14Srgrimes//===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2233204Stijl//
34Srgrimes// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4233204Stijl// See https://llvm.org/LICENSE.txt for license information.
54Srgrimes// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6233204Stijl//
7//===----------------------------------------------------------------------===//
8//
9// This file implements AMDGPU TargetInfo objects.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "clang/Basic/Builtins.h"
15#include "clang/Basic/CodeGenOptions.h"
16#include "clang/Basic/LangOptions.h"
17#include "clang/Basic/MacroBuilder.h"
18#include "clang/Basic/TargetBuiltins.h"
19#include "llvm/ADT/StringSwitch.h"
20#include "llvm/IR/DataLayout.h"
21
22using namespace clang;
23using namespace clang::targets;
24
25namespace clang {
26namespace targets {
27
28// If you edit the description strings, make sure you update
29// getPointerWidthV().
30
31static const char *const DataLayoutStringR600 =
32    "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33    "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
34
35static const char *const DataLayoutStringAMDGCN =
36    "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37    "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38    "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
39    "-ni:7";
40
41const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42    Generic,  // Default
43    Global,   // opencl_global
44    Local,    // opencl_local
45    Constant, // opencl_constant
46    Private,  // opencl_private
47    Generic,  // opencl_generic
48    Global,   // cuda_device
49    Constant, // cuda_constant
50    Local,    // cuda_shared
51    Generic,  // ptr32_sptr
52    Generic,  // ptr32_uptr
53    Generic   // ptr64
54};
55
56const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
57    Private,  // Default
58    Global,   // opencl_global
59    Local,    // opencl_local
60    Constant, // opencl_constant
61    Private,  // opencl_private
62    Generic,  // opencl_generic
63    Global,   // cuda_device
64    Constant, // cuda_constant
65    Local,    // cuda_shared
66    Generic,  // ptr32_sptr
67    Generic,  // ptr32_uptr
68    Generic   // ptr64
69
70};
71} // namespace targets
72} // namespace clang
73
74const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
75#define BUILTIN(ID, TYPE, ATTRS)                                               \
76  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
77#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
78  {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
79#include "clang/Basic/BuiltinsAMDGPU.def"
80};
81
82const char *const AMDGPUTargetInfo::GCCRegNames[] = {
83  "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
84  "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
85  "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
86  "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
87  "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
88  "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
89  "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
90  "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
91  "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
92  "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
93  "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
94  "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
95  "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
96  "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
97  "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
98  "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
99  "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
100  "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
101  "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
102  "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
103  "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
104  "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
105  "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
106  "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
107  "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
108  "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
109  "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
110  "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
111  "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
112  "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
113  "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
114  "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
115  "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
116  "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
117  "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
118  "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
119  "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
120  "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
121  "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
122  "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
123  "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
124  "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
125  "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
126  "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
127  "flat_scratch_lo", "flat_scratch_hi"
128};
129
130ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
131  return llvm::makeArrayRef(GCCRegNames);
132}
133
134bool AMDGPUTargetInfo::initFeatureMap(
135    llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
136    const std::vector<std::string> &FeatureVec) const {
137
138  using namespace llvm::AMDGPU;
139
140  // XXX - What does the member GPU mean if device name string passed here?
141  if (isAMDGCN(getTriple())) {
142    switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
143    case GK_GFX1012:
144    case GK_GFX1011:
145      Features["dot1-insts"] = true;
146      Features["dot2-insts"] = true;
147      Features["dot5-insts"] = true;
148      Features["dot6-insts"] = true;
149      LLVM_FALLTHROUGH;
150    case GK_GFX1010:
151      Features["dl-insts"] = true;
152      Features["ci-insts"] = true;
153      Features["flat-address-space"] = true;
154      Features["16-bit-insts"] = true;
155      Features["dpp"] = true;
156      Features["gfx8-insts"] = true;
157      Features["gfx9-insts"] = true;
158      Features["gfx10-insts"] = true;
159      Features["s-memrealtime"] = true;
160      break;
161    case GK_GFX908:
162      Features["dot3-insts"] = true;
163      Features["dot4-insts"] = true;
164      Features["dot5-insts"] = true;
165      Features["dot6-insts"] = true;
166      LLVM_FALLTHROUGH;
167    case GK_GFX906:
168      Features["dl-insts"] = true;
169      Features["dot1-insts"] = true;
170      Features["dot2-insts"] = true;
171      LLVM_FALLTHROUGH;
172    case GK_GFX909:
173    case GK_GFX904:
174    case GK_GFX902:
175    case GK_GFX900:
176      Features["gfx9-insts"] = true;
177      LLVM_FALLTHROUGH;
178    case GK_GFX810:
179    case GK_GFX803:
180    case GK_GFX802:
181    case GK_GFX801:
182      Features["gfx8-insts"] = true;
183      Features["16-bit-insts"] = true;
184      Features["dpp"] = true;
185      Features["s-memrealtime"] = true;
186      LLVM_FALLTHROUGH;
187    case GK_GFX704:
188    case GK_GFX703:
189    case GK_GFX702:
190    case GK_GFX701:
191    case GK_GFX700:
192      Features["ci-insts"] = true;
193      Features["flat-address-space"] = true;
194      LLVM_FALLTHROUGH;
195    case GK_GFX601:
196    case GK_GFX600:
197      break;
198    case GK_NONE:
199      break;
200    default:
201      llvm_unreachable("Unhandled GPU!");
202    }
203  } else {
204    if (CPU.empty())
205      CPU = "r600";
206
207    switch (llvm::AMDGPU::parseArchR600(CPU)) {
208    case GK_CAYMAN:
209    case GK_CYPRESS:
210    case GK_RV770:
211    case GK_RV670:
212      // TODO: Add fp64 when implemented.
213      break;
214    case GK_TURKS:
215    case GK_CAICOS:
216    case GK_BARTS:
217    case GK_SUMO:
218    case GK_REDWOOD:
219    case GK_JUNIPER:
220    case GK_CEDAR:
221    case GK_RV730:
222    case GK_RV710:
223    case GK_RS880:
224    case GK_R630:
225    case GK_R600:
226      break;
227    default:
228      llvm_unreachable("Unhandled GPU!");
229    }
230  }
231
232  return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
233}
234
235void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
236                                           TargetOptions &TargetOpts) const {
237  bool hasFP32Denormals = false;
238  bool hasFP64Denormals = false;
239
240  for (auto &I : TargetOpts.FeaturesAsWritten) {
241    if (I == "+fp32-denormals" || I == "-fp32-denormals")
242      hasFP32Denormals = true;
243    if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
244      hasFP64Denormals = true;
245  }
246  if (!hasFP32Denormals)
247    TargetOpts.Features.push_back(
248      (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
249             ? '+' : '-') + Twine("fp32-denormals"))
250            .str());
251  // Always do not flush fp64 or fp16 denorms.
252  if (!hasFP64Denormals && hasFP64())
253    TargetOpts.Features.push_back("+fp64-fp16-denormals");
254}
255
256void AMDGPUTargetInfo::fillValidCPUList(
257    SmallVectorImpl<StringRef> &Values) const {
258  if (isAMDGCN(getTriple()))
259    llvm::AMDGPU::fillValidArchListAMDGCN(Values);
260  else
261    llvm::AMDGPU::fillValidArchListR600(Values);
262}
263
264void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
265  AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
266}
267
268AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
269                                   const TargetOptions &Opts)
270    : TargetInfo(Triple),
271      GPUKind(isAMDGCN(Triple) ?
272              llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
273              llvm::AMDGPU::parseArchR600(Opts.CPU)),
274      GPUFeatures(isAMDGCN(Triple) ?
275                  llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
276                  llvm::AMDGPU::getArchAttrR600(GPUKind)) {
277  resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
278                                        : DataLayoutStringR600);
279  assert(DataLayout->getAllocaAddrSpace() == Private);
280
281  setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
282                     !isAMDGCN(Triple));
283  UseAddrSpaceMapMangling = true;
284
285  HasLegalHalfType = true;
286  HasFloat16 = true;
287
288  // Set pointer width and alignment for target address space 0.
289  PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
290  if (getMaxPointerWidth() == 64) {
291    LongWidth = LongAlign = 64;
292    SizeType = UnsignedLong;
293    PtrDiffType = SignedLong;
294    IntPtrType = SignedLong;
295  }
296
297  MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
298}
299
300void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
301  TargetInfo::adjust(Opts);
302  // ToDo: There are still a few places using default address space as private
303  // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
304  // can be removed from the following line.
305  setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
306                     !isAMDGCN(getTriple()));
307}
308
309ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
310  return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
311                                             Builtin::FirstTSBuiltin);
312}
313
314void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
315                                        MacroBuilder &Builder) const {
316  Builder.defineMacro("__AMD__");
317  Builder.defineMacro("__AMDGPU__");
318
319  if (isAMDGCN(getTriple()))
320    Builder.defineMacro("__AMDGCN__");
321  else
322    Builder.defineMacro("__R600__");
323
324  if (GPUKind != llvm::AMDGPU::GK_NONE) {
325    StringRef CanonName = isAMDGCN(getTriple()) ?
326      getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
327    Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
328  }
329
330  // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
331  // removed in the near future.
332  if (hasFMAF())
333    Builder.defineMacro("__HAS_FMAF__");
334  if (hasFastFMAF())
335    Builder.defineMacro("FP_FAST_FMAF");
336  if (hasLDEXPF())
337    Builder.defineMacro("__HAS_LDEXPF__");
338  if (hasFP64())
339    Builder.defineMacro("__HAS_FP64__");
340  if (hasFastFMA())
341    Builder.defineMacro("FP_FAST_FMA");
342}
343
344void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
345  assert(HalfFormat == Aux->HalfFormat);
346  assert(FloatFormat == Aux->FloatFormat);
347  assert(DoubleFormat == Aux->DoubleFormat);
348
349  // On x86_64 long double is 80-bit extended precision format, which is
350  // not supported by AMDGPU. 128-bit floating point format is also not
351  // supported by AMDGPU. Therefore keep its own format for these two types.
352  auto SaveLongDoubleFormat = LongDoubleFormat;
353  auto SaveFloat128Format = Float128Format;
354  copyAuxTarget(Aux);
355  LongDoubleFormat = SaveLongDoubleFormat;
356  Float128Format = SaveFloat128Format;
357}
358