1//===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements AMDGPU TargetInfo objects.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "clang/Basic/Builtins.h"
15#include "clang/Basic/CodeGenOptions.h"
16#include "clang/Basic/LangOptions.h"
17#include "clang/Basic/MacroBuilder.h"
18#include "clang/Basic/TargetBuiltins.h"
19#include "llvm/ADT/StringSwitch.h"
20#include "llvm/Frontend/OpenMP/OMPGridValues.h"
21#include "llvm/IR/DataLayout.h"
22
23using namespace clang;
24using namespace clang::targets;
25
26namespace clang {
27namespace targets {
28
29// If you edit the description strings, make sure you update
30// getPointerWidthV().
31
32static const char *const DataLayoutStringR600 =
33    "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
34    "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
35
36static const char *const DataLayoutStringAMDGCN =
37    "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
38    "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
39    "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
40    "-ni:7";
41
42const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
43    Generic,  // Default
44    Global,   // opencl_global
45    Local,    // opencl_local
46    Constant, // opencl_constant
47    Private,  // opencl_private
48    Generic,  // opencl_generic
49    Global,   // cuda_device
50    Constant, // cuda_constant
51    Local,    // cuda_shared
52    Generic,  // ptr32_sptr
53    Generic,  // ptr32_uptr
54    Generic   // ptr64
55};
56
57const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
58    Private,  // Default
59    Global,   // opencl_global
60    Local,    // opencl_local
61    Constant, // opencl_constant
62    Private,  // opencl_private
63    Generic,  // opencl_generic
64    Global,   // cuda_device
65    Constant, // cuda_constant
66    Local,    // cuda_shared
67    Generic,  // ptr32_sptr
68    Generic,  // ptr32_uptr
69    Generic   // ptr64
70
71};
72} // namespace targets
73} // namespace clang
74
75const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
76#define BUILTIN(ID, TYPE, ATTRS)                                               \
77  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
78#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
79  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
80#include "clang/Basic/BuiltinsAMDGPU.def"
81};
82
83const char *const AMDGPUTargetInfo::GCCRegNames[] = {
84  "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
85  "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
86  "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
87  "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
88  "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
89  "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
90  "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
91  "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
92  "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
93  "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
94  "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
95  "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
96  "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
97  "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
98  "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
99  "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
100  "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
101  "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
102  "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
103  "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
104  "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
105  "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
106  "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
107  "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
108  "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
109  "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
110  "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
111  "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
112  "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
113  "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
114  "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
115  "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
116  "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
117  "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
118  "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
119  "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
120  "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
121  "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
122  "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
123  "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
124  "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
125  "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
126  "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
127  "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
128  "flat_scratch_lo", "flat_scratch_hi",
129  "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
130  "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
131  "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
132  "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
133  "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
134  "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
135  "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
136  "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
137  "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
138  "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
139  "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
140  "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
141  "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
142  "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
143  "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
144  "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
145  "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
146  "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
147  "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
148  "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
149  "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
150  "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
151  "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
152  "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
153  "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
154  "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
155  "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
156  "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
157  "a252", "a253", "a254", "a255"
158};
159
160ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
161  return llvm::makeArrayRef(GCCRegNames);
162}
163
164bool AMDGPUTargetInfo::initFeatureMap(
165    llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
166    const std::vector<std::string> &FeatureVec) const {
167
168  using namespace llvm::AMDGPU;
169
170  // XXX - What does the member GPU mean if device name string passed here?
171  if (isAMDGCN(getTriple())) {
172    switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
173    case GK_GFX1030:
174      Features["ci-insts"] = true;
175      Features["dot1-insts"] = true;
176      Features["dot2-insts"] = true;
177      Features["dot5-insts"] = true;
178      Features["dot6-insts"] = true;
179      Features["dl-insts"] = true;
180      Features["flat-address-space"] = true;
181      Features["16-bit-insts"] = true;
182      Features["dpp"] = true;
183      Features["gfx8-insts"] = true;
184      Features["gfx9-insts"] = true;
185      Features["gfx10-insts"] = true;
186      Features["gfx10-3-insts"] = true;
187      Features["s-memrealtime"] = true;
188      break;
189    case GK_GFX1012:
190    case GK_GFX1011:
191      Features["dot1-insts"] = true;
192      Features["dot2-insts"] = true;
193      Features["dot5-insts"] = true;
194      Features["dot6-insts"] = true;
195      LLVM_FALLTHROUGH;
196    case GK_GFX1010:
197      Features["dl-insts"] = true;
198      Features["ci-insts"] = true;
199      Features["flat-address-space"] = true;
200      Features["16-bit-insts"] = true;
201      Features["dpp"] = true;
202      Features["gfx8-insts"] = true;
203      Features["gfx9-insts"] = true;
204      Features["gfx10-insts"] = true;
205      Features["s-memrealtime"] = true;
206      break;
207    case GK_GFX908:
208      Features["dot3-insts"] = true;
209      Features["dot4-insts"] = true;
210      Features["dot5-insts"] = true;
211      Features["dot6-insts"] = true;
212      Features["mai-insts"] = true;
213      LLVM_FALLTHROUGH;
214    case GK_GFX906:
215      Features["dl-insts"] = true;
216      Features["dot1-insts"] = true;
217      Features["dot2-insts"] = true;
218      LLVM_FALLTHROUGH;
219    case GK_GFX909:
220    case GK_GFX904:
221    case GK_GFX902:
222    case GK_GFX900:
223      Features["gfx9-insts"] = true;
224      LLVM_FALLTHROUGH;
225    case GK_GFX810:
226    case GK_GFX803:
227    case GK_GFX802:
228    case GK_GFX801:
229      Features["gfx8-insts"] = true;
230      Features["16-bit-insts"] = true;
231      Features["dpp"] = true;
232      Features["s-memrealtime"] = true;
233      LLVM_FALLTHROUGH;
234    case GK_GFX704:
235    case GK_GFX703:
236    case GK_GFX702:
237    case GK_GFX701:
238    case GK_GFX700:
239      Features["ci-insts"] = true;
240      Features["flat-address-space"] = true;
241      LLVM_FALLTHROUGH;
242    case GK_GFX601:
243    case GK_GFX600:
244      break;
245    case GK_NONE:
246      break;
247    default:
248      llvm_unreachable("Unhandled GPU!");
249    }
250  } else {
251    if (CPU.empty())
252      CPU = "r600";
253
254    switch (llvm::AMDGPU::parseArchR600(CPU)) {
255    case GK_CAYMAN:
256    case GK_CYPRESS:
257    case GK_RV770:
258    case GK_RV670:
259      // TODO: Add fp64 when implemented.
260      break;
261    case GK_TURKS:
262    case GK_CAICOS:
263    case GK_BARTS:
264    case GK_SUMO:
265    case GK_REDWOOD:
266    case GK_JUNIPER:
267    case GK_CEDAR:
268    case GK_RV730:
269    case GK_RV710:
270    case GK_RS880:
271    case GK_R630:
272    case GK_R600:
273      break;
274    default:
275      llvm_unreachable("Unhandled GPU!");
276    }
277  }
278
279  return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
280}
281
282void AMDGPUTargetInfo::fillValidCPUList(
283    SmallVectorImpl<StringRef> &Values) const {
284  if (isAMDGCN(getTriple()))
285    llvm::AMDGPU::fillValidArchListAMDGCN(Values);
286  else
287    llvm::AMDGPU::fillValidArchListR600(Values);
288}
289
290void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
291  AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
292}
293
294AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
295                                   const TargetOptions &Opts)
296    : TargetInfo(Triple),
297      GPUKind(isAMDGCN(Triple) ?
298              llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
299              llvm::AMDGPU::parseArchR600(Opts.CPU)),
300      GPUFeatures(isAMDGCN(Triple) ?
301                  llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
302                  llvm::AMDGPU::getArchAttrR600(GPUKind)) {
303  resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
304                                        : DataLayoutStringR600);
305  assert(DataLayout->getAllocaAddrSpace() == Private);
306  GridValues = llvm::omp::AMDGPUGpuGridValues;
307
308  setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
309                     !isAMDGCN(Triple));
310  UseAddrSpaceMapMangling = true;
311
312  HasLegalHalfType = true;
313  HasFloat16 = true;
314
315  // Set pointer width and alignment for target address space 0.
316  PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
317  if (getMaxPointerWidth() == 64) {
318    LongWidth = LongAlign = 64;
319    SizeType = UnsignedLong;
320    PtrDiffType = SignedLong;
321    IntPtrType = SignedLong;
322  }
323
324  MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
325}
326
327void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
328  TargetInfo::adjust(Opts);
329  // ToDo: There are still a few places using default address space as private
330  // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
331  // can be removed from the following line.
332  setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
333                     !isAMDGCN(getTriple()));
334}
335
336ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
337  return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
338                                             Builtin::FirstTSBuiltin);
339}
340
341void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
342                                        MacroBuilder &Builder) const {
343  Builder.defineMacro("__AMD__");
344  Builder.defineMacro("__AMDGPU__");
345
346  if (isAMDGCN(getTriple()))
347    Builder.defineMacro("__AMDGCN__");
348  else
349    Builder.defineMacro("__R600__");
350
351  if (GPUKind != llvm::AMDGPU::GK_NONE) {
352    StringRef CanonName = isAMDGCN(getTriple()) ?
353      getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
354    Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
355  }
356
357  // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
358  // removed in the near future.
359  if (hasFMAF())
360    Builder.defineMacro("__HAS_FMAF__");
361  if (hasFastFMAF())
362    Builder.defineMacro("FP_FAST_FMAF");
363  if (hasLDEXPF())
364    Builder.defineMacro("__HAS_LDEXPF__");
365  if (hasFP64())
366    Builder.defineMacro("__HAS_FP64__");
367  if (hasFastFMA())
368    Builder.defineMacro("FP_FAST_FMA");
369}
370
371void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
372  assert(HalfFormat == Aux->HalfFormat);
373  assert(FloatFormat == Aux->FloatFormat);
374  assert(DoubleFormat == Aux->DoubleFormat);
375
376  // On x86_64 long double is 80-bit extended precision format, which is
377  // not supported by AMDGPU. 128-bit floating point format is also not
378  // supported by AMDGPU. Therefore keep its own format for these two types.
379  auto SaveLongDoubleFormat = LongDoubleFormat;
380  auto SaveFloat128Format = Float128Format;
381  copyAuxTarget(Aux);
382  LongDoubleFormat = SaveLongDoubleFormat;
383  Float128Format = SaveFloat128Format;
384  // For certain builtin types support on the host target, claim they are
385  // support to pass the compilation of the host code during the device-side
386  // compilation.
387  // FIXME: As the side effect, we also accept `__float128` uses in the device
388  // code. To rejct these builtin types supported in the host target but not in
389  // the device target, one approach would support `device_builtin` attribute
390  // so that we could tell the device builtin types from the host ones. The
391  // also solves the different representations of the same builtin type, such
392  // as `size_t` in the MSVC environment.
393  if (Aux->hasFloat128Type()) {
394    HasFloat128 = true;
395    Float128Format = DoubleFormat;
396  }
397}
398