1//===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements NVPTX TargetInfo objects.
10//
11//===----------------------------------------------------------------------===//
12
13#include "NVPTX.h"
14#include "Targets.h"
15#include "clang/Basic/Builtins.h"
16#include "clang/Basic/MacroBuilder.h"
17#include "clang/Basic/TargetBuiltins.h"
18#include "llvm/ADT/StringSwitch.h"
19
20using namespace clang;
21using namespace clang::targets;
22
23static constexpr Builtin::Info BuiltinInfo[] = {
24#define BUILTIN(ID, TYPE, ATTRS)                                               \
25  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
26#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER)                                    \
27  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES},
28#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
29  {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
30#include "clang/Basic/BuiltinsNVPTX.def"
31};
32
33const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"};
34
35NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
36                                 const TargetOptions &Opts,
37                                 unsigned TargetPointerWidth)
38    : TargetInfo(Triple) {
39  assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) &&
40         "NVPTX only supports 32- and 64-bit modes.");
41
42  PTXVersion = 32;
43  for (const StringRef Feature : Opts.FeaturesAsWritten) {
44    int PTXV;
45    if (!Feature.startswith("+ptx") ||
46        Feature.drop_front(4).getAsInteger(10, PTXV))
47      continue;
48    PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)?
49  }
50
51  TLSSupported = false;
52  VLASupported = false;
53  AddrSpaceMap = &NVPTXAddrSpaceMap;
54  UseAddrSpaceMapMangling = true;
55  // __bf16 is always available as a load/store only type.
56  BFloat16Width = BFloat16Align = 16;
57  BFloat16Format = &llvm::APFloat::BFloat();
58
59  // Define available target features
60  // These must be defined in sorted order!
61  NoAsmVariants = true;
62  GPU = CudaArch::SM_20;
63
64  if (TargetPointerWidth == 32)
65    resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
66  else if (Opts.NVPTXUseShortPointers)
67    resetDataLayout(
68        "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
69  else
70    resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64");
71
72  // If possible, get a TargetInfo for our host triple, so we can match its
73  // types.
74  llvm::Triple HostTriple(Opts.HostTriple);
75  if (!HostTriple.isNVPTX())
76    HostTarget.reset(AllocateTarget(llvm::Triple(Opts.HostTriple), Opts));
77
78  // If no host target, make some guesses about the data layout and return.
79  if (!HostTarget) {
80    LongWidth = LongAlign = TargetPointerWidth;
81    PointerWidth = PointerAlign = TargetPointerWidth;
82    switch (TargetPointerWidth) {
83    case 32:
84      SizeType = TargetInfo::UnsignedInt;
85      PtrDiffType = TargetInfo::SignedInt;
86      IntPtrType = TargetInfo::SignedInt;
87      break;
88    case 64:
89      SizeType = TargetInfo::UnsignedLong;
90      PtrDiffType = TargetInfo::SignedLong;
91      IntPtrType = TargetInfo::SignedLong;
92      break;
93    default:
94      llvm_unreachable("TargetPointerWidth must be 32 or 64");
95    }
96    return;
97  }
98
99  // Copy properties from host target.
100  PointerWidth = HostTarget->getPointerWidth(LangAS::Default);
101  PointerAlign = HostTarget->getPointerAlign(LangAS::Default);
102  BoolWidth = HostTarget->getBoolWidth();
103  BoolAlign = HostTarget->getBoolAlign();
104  IntWidth = HostTarget->getIntWidth();
105  IntAlign = HostTarget->getIntAlign();
106  HalfWidth = HostTarget->getHalfWidth();
107  HalfAlign = HostTarget->getHalfAlign();
108  FloatWidth = HostTarget->getFloatWidth();
109  FloatAlign = HostTarget->getFloatAlign();
110  DoubleWidth = HostTarget->getDoubleWidth();
111  DoubleAlign = HostTarget->getDoubleAlign();
112  LongWidth = HostTarget->getLongWidth();
113  LongAlign = HostTarget->getLongAlign();
114  LongLongWidth = HostTarget->getLongLongWidth();
115  LongLongAlign = HostTarget->getLongLongAlign();
116  MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0);
117  NewAlign = HostTarget->getNewAlign();
118  DefaultAlignForAttributeAligned =
119      HostTarget->getDefaultAlignForAttributeAligned();
120  SizeType = HostTarget->getSizeType();
121  IntMaxType = HostTarget->getIntMaxType();
122  PtrDiffType = HostTarget->getPtrDiffType(LangAS::Default);
123  IntPtrType = HostTarget->getIntPtrType();
124  WCharType = HostTarget->getWCharType();
125  WIntType = HostTarget->getWIntType();
126  Char16Type = HostTarget->getChar16Type();
127  Char32Type = HostTarget->getChar32Type();
128  Int64Type = HostTarget->getInt64Type();
129  SigAtomicType = HostTarget->getSigAtomicType();
130  ProcessIDType = HostTarget->getProcessIDType();
131
132  UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment();
133  UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment();
134  UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment();
135  ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary();
136
137  // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and
138  // we need those macros to be identical on host and device, because (among
139  // other things) they affect which standard library classes are defined, and
140  // we need all classes to be defined on both the host and device.
141  MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth();
142
143  // Properties intentionally not copied from host:
144  // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
145  //   host/device boundary.
146  // - SuitableAlign: Not visible across the host/device boundary, and may
147  //   correctly be different on host/device, e.g. if host has wider vector
148  //   types than device.
149  // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
150  //   as its double type, but that's not necessarily true on the host.
151  //   TODO: nvcc emits a warning when using long double on device; we should
152  //   do the same.
153}
154
155ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const {
156  return llvm::ArrayRef(GCCRegNames);
157}
158
159bool NVPTXTargetInfo::hasFeature(StringRef Feature) const {
160  return llvm::StringSwitch<bool>(Feature)
161      .Cases("ptx", "nvptx", true)
162      .Default(false);
163}
164
165void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
166                                       MacroBuilder &Builder) const {
167  Builder.defineMacro("__PTX__");
168  Builder.defineMacro("__NVPTX__");
169  if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice) {
170    // Set __CUDA_ARCH__ for the GPU specified.
171    std::string CUDAArchCode = [this] {
172      switch (GPU) {
173      case CudaArch::GFX600:
174      case CudaArch::GFX601:
175      case CudaArch::GFX602:
176      case CudaArch::GFX700:
177      case CudaArch::GFX701:
178      case CudaArch::GFX702:
179      case CudaArch::GFX703:
180      case CudaArch::GFX704:
181      case CudaArch::GFX705:
182      case CudaArch::GFX801:
183      case CudaArch::GFX802:
184      case CudaArch::GFX803:
185      case CudaArch::GFX805:
186      case CudaArch::GFX810:
187      case CudaArch::GFX900:
188      case CudaArch::GFX902:
189      case CudaArch::GFX904:
190      case CudaArch::GFX906:
191      case CudaArch::GFX908:
192      case CudaArch::GFX909:
193      case CudaArch::GFX90a:
194      case CudaArch::GFX90c:
195      case CudaArch::GFX940:
196      case CudaArch::GFX1010:
197      case CudaArch::GFX1011:
198      case CudaArch::GFX1012:
199      case CudaArch::GFX1013:
200      case CudaArch::GFX1030:
201      case CudaArch::GFX1031:
202      case CudaArch::GFX1032:
203      case CudaArch::GFX1033:
204      case CudaArch::GFX1034:
205      case CudaArch::GFX1035:
206      case CudaArch::GFX1036:
207      case CudaArch::GFX1100:
208      case CudaArch::GFX1101:
209      case CudaArch::GFX1102:
210      case CudaArch::GFX1103:
211      case CudaArch::Generic:
212      case CudaArch::LAST:
213        break;
214      case CudaArch::UNUSED:
215      case CudaArch::UNKNOWN:
216        assert(false && "No GPU arch when compiling CUDA device code.");
217        return "";
218      case CudaArch::SM_20:
219        return "200";
220      case CudaArch::SM_21:
221        return "210";
222      case CudaArch::SM_30:
223        return "300";
224      case CudaArch::SM_32:
225        return "320";
226      case CudaArch::SM_35:
227        return "350";
228      case CudaArch::SM_37:
229        return "370";
230      case CudaArch::SM_50:
231        return "500";
232      case CudaArch::SM_52:
233        return "520";
234      case CudaArch::SM_53:
235        return "530";
236      case CudaArch::SM_60:
237        return "600";
238      case CudaArch::SM_61:
239        return "610";
240      case CudaArch::SM_62:
241        return "620";
242      case CudaArch::SM_70:
243        return "700";
244      case CudaArch::SM_72:
245        return "720";
246      case CudaArch::SM_75:
247        return "750";
248      case CudaArch::SM_80:
249        return "800";
250      case CudaArch::SM_86:
251        return "860";
252      case CudaArch::SM_87:
253        return "870";
254      case CudaArch::SM_89:
255        return "890";
256      case CudaArch::SM_90:
257        return "900";
258      }
259      llvm_unreachable("unhandled CudaArch");
260    }();
261    Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
262  }
263}
264
265ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const {
266  return llvm::ArrayRef(BuiltinInfo,
267                        clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin);
268}
269