1//===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements semantic analysis for CUDA constructs.
10///
11//===----------------------------------------------------------------------===//
12
13#include "clang/AST/ASTContext.h"
14#include "clang/AST/Decl.h"
15#include "clang/AST/ExprCXX.h"
16#include "clang/Basic/Cuda.h"
17#include "clang/Basic/TargetInfo.h"
18#include "clang/Lex/Preprocessor.h"
19#include "clang/Sema/Lookup.h"
20#include "clang/Sema/ScopeInfo.h"
21#include "clang/Sema/Sema.h"
22#include "clang/Sema/SemaDiagnostic.h"
23#include "clang/Sema/SemaInternal.h"
24#include "clang/Sema/Template.h"
25#include "llvm/ADT/SmallVector.h"
26#include <optional>
27using namespace clang;
28
29template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) {
30  if (!D)
31    return false;
32  if (auto *A = D->getAttr<AttrT>())
33    return !A->isImplicit();
34  return false;
35}
36
37void Sema::PushForceCUDAHostDevice() {
38  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
39  ForceCUDAHostDeviceDepth++;
40}
41
42bool Sema::PopForceCUDAHostDevice() {
43  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
44  if (ForceCUDAHostDeviceDepth == 0)
45    return false;
46  ForceCUDAHostDeviceDepth--;
47  return true;
48}
49
50ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
51                                         MultiExprArg ExecConfig,
52                                         SourceLocation GGGLoc) {
53  FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
54  if (!ConfigDecl)
55    return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
56                     << getCudaConfigureFuncName());
57  QualType ConfigQTy = ConfigDecl->getType();
58
59  DeclRefExpr *ConfigDR = new (Context)
60      DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
61  MarkFunctionReferenced(LLLLoc, ConfigDecl);
62
63  return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
64                       /*IsExecConfig=*/true);
65}
66
67Sema::CUDAFunctionTarget
68Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) {
69  bool HasHostAttr = false;
70  bool HasDeviceAttr = false;
71  bool HasGlobalAttr = false;
72  bool HasInvalidTargetAttr = false;
73  for (const ParsedAttr &AL : Attrs) {
74    switch (AL.getKind()) {
75    case ParsedAttr::AT_CUDAGlobal:
76      HasGlobalAttr = true;
77      break;
78    case ParsedAttr::AT_CUDAHost:
79      HasHostAttr = true;
80      break;
81    case ParsedAttr::AT_CUDADevice:
82      HasDeviceAttr = true;
83      break;
84    case ParsedAttr::AT_CUDAInvalidTarget:
85      HasInvalidTargetAttr = true;
86      break;
87    default:
88      break;
89    }
90  }
91
92  if (HasInvalidTargetAttr)
93    return CFT_InvalidTarget;
94
95  if (HasGlobalAttr)
96    return CFT_Global;
97
98  if (HasHostAttr && HasDeviceAttr)
99    return CFT_HostDevice;
100
101  if (HasDeviceAttr)
102    return CFT_Device;
103
104  return CFT_Host;
105}
106
107template <typename A>
108static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {
109  return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
110           return isa<A>(Attribute) &&
111                  !(IgnoreImplicitAttr && Attribute->isImplicit());
112         });
113}
114
115Sema::CUDATargetContextRAII::CUDATargetContextRAII(Sema &S_,
116                                                   CUDATargetContextKind K,
117                                                   Decl *D)
118    : S(S_) {
119  SavedCtx = S.CurCUDATargetCtx;
120  assert(K == CTCK_InitGlobalVar);
121  auto *VD = dyn_cast_or_null<VarDecl>(D);
122  if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
123    auto Target = CFT_Host;
124    if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) &&
125         !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) ||
126        hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) ||
127        hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true))
128      Target = CFT_Device;
129    S.CurCUDATargetCtx = {Target, K, VD};
130  }
131}
132
133/// IdentifyCUDATarget - Determine the CUDA compilation target for this function
134Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
135                                                  bool IgnoreImplicitHDAttr) {
136  // Code that lives outside a function gets the target from CurCUDATargetCtx.
137  if (D == nullptr)
138    return CurCUDATargetCtx.Target;
139
140  if (D->hasAttr<CUDAInvalidTargetAttr>())
141    return CFT_InvalidTarget;
142
143  if (D->hasAttr<CUDAGlobalAttr>())
144    return CFT_Global;
145
146  if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
147    if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
148      return CFT_HostDevice;
149    return CFT_Device;
150  } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
151    return CFT_Host;
152  } else if ((D->isImplicit() || !D->isUserProvided()) &&
153             !IgnoreImplicitHDAttr) {
154    // Some implicit declarations (like intrinsic functions) are not marked.
155    // Set the most lenient target on them for maximal flexibility.
156    return CFT_HostDevice;
157  }
158
159  return CFT_Host;
160}
161
162/// IdentifyTarget - Determine the CUDA compilation target for this variable.
163Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) {
164  if (Var->hasAttr<HIPManagedAttr>())
165    return CVT_Unified;
166  // Only constexpr and const variabless with implicit constant attribute
167  // are emitted on both sides. Such variables are promoted to device side
168  // only if they have static constant intializers on device side.
169  if ((Var->isConstexpr() || Var->getType().isConstQualified()) &&
170      Var->hasAttr<CUDAConstantAttr>() &&
171      !hasExplicitAttr<CUDAConstantAttr>(Var))
172    return CVT_Both;
173  if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() ||
174      Var->hasAttr<CUDASharedAttr>() ||
175      Var->getType()->isCUDADeviceBuiltinSurfaceType() ||
176      Var->getType()->isCUDADeviceBuiltinTextureType())
177    return CVT_Device;
178  // Function-scope static variable without explicit device or constant
179  // attribute are emitted
180  //  - on both sides in host device functions
181  //  - on device side in device or global functions
182  if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) {
183    switch (IdentifyCUDATarget(FD)) {
184    case CFT_HostDevice:
185      return CVT_Both;
186    case CFT_Device:
187    case CFT_Global:
188      return CVT_Device;
189    default:
190      return CVT_Host;
191    }
192  }
193  return CVT_Host;
194}
195
196// * CUDA Call preference table
197//
198// F - from,
199// T - to
200// Ph - preference in host mode
201// Pd - preference in device mode
202// H  - handled in (x)
203// Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
204//
205// | F  | T  | Ph  | Pd  |  H  |
206// |----+----+-----+-----+-----+
207// | d  | d  | N   | N   | (c) |
208// | d  | g  | --  | --  | (a) |
209// | d  | h  | --  | --  | (e) |
210// | d  | hd | HD  | HD  | (b) |
211// | g  | d  | N   | N   | (c) |
212// | g  | g  | --  | --  | (a) |
213// | g  | h  | --  | --  | (e) |
214// | g  | hd | HD  | HD  | (b) |
215// | h  | d  | --  | --  | (e) |
216// | h  | g  | N   | N   | (c) |
217// | h  | h  | N   | N   | (c) |
218// | h  | hd | HD  | HD  | (b) |
219// | hd | d  | WS  | SS  | (d) |
220// | hd | g  | SS  | --  |(d/a)|
221// | hd | h  | SS  | WS  | (d) |
222// | hd | hd | HD  | HD  | (b) |
223
224Sema::CUDAFunctionPreference
225Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
226                             const FunctionDecl *Callee) {
227  assert(Callee && "Callee must be valid.");
228
229  // Treat ctor/dtor as host device function in device var initializer to allow
230  // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor
231  // will be diagnosed by checkAllowedCUDAInitializer.
232  if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar &&
233      CurCUDATargetCtx.Target == CFT_Device &&
234      (isa<CXXConstructorDecl>(Callee) || isa<CXXDestructorDecl>(Callee)))
235    return CFP_HostDevice;
236
237  CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller);
238  CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
239
240  // If one of the targets is invalid, the check always fails, no matter what
241  // the other target is.
242  if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
243    return CFP_Never;
244
245  // (a) Can't call global from some contexts until we support CUDA's
246  // dynamic parallelism.
247  if (CalleeTarget == CFT_Global &&
248      (CallerTarget == CFT_Global || CallerTarget == CFT_Device))
249    return CFP_Never;
250
251  // (b) Calling HostDevice is OK for everyone.
252  if (CalleeTarget == CFT_HostDevice)
253    return CFP_HostDevice;
254
255  // (c) Best case scenarios
256  if (CalleeTarget == CallerTarget ||
257      (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
258      (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
259    return CFP_Native;
260
261  // HipStdPar mode is special, in that assessing whether a device side call to
262  // a host target is deferred to a subsequent pass, and cannot unambiguously be
263  // adjudicated in the AST, hence we optimistically allow them to pass here.
264  if (getLangOpts().HIPStdPar &&
265      (CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
266       CallerTarget == CFT_HostDevice) &&
267      CalleeTarget == CFT_Host)
268    return CFP_HostDevice;
269
270  // (d) HostDevice behavior depends on compilation mode.
271  if (CallerTarget == CFT_HostDevice) {
272    // It's OK to call a compilation-mode matching function from an HD one.
273    if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) ||
274        (!getLangOpts().CUDAIsDevice &&
275         (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)))
276      return CFP_SameSide;
277
278    // Calls from HD to non-mode-matching functions (i.e., to host functions
279    // when compiling in device mode or to device functions when compiling in
280    // host mode) are allowed at the sema level, but eventually rejected if
281    // they're ever codegened.  TODO: Reject said calls earlier.
282    return CFP_WrongSide;
283  }
284
285  // (e) Calling across device/host boundary is not something you should do.
286  if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
287      (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
288      (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
289    return CFP_Never;
290
291  llvm_unreachable("All cases should've been handled by now.");
292}
293
294template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) {
295  if (!D)
296    return false;
297  if (auto *A = D->getAttr<AttrT>())
298    return A->isImplicit();
299  return D->isImplicit();
300}
301
302bool Sema::isCUDAImplicitHostDeviceFunction(const FunctionDecl *D) {
303  bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D);
304  bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D);
305  return IsImplicitDevAttr && IsImplicitHostAttr;
306}
307
308void Sema::EraseUnwantedCUDAMatches(
309    const FunctionDecl *Caller,
310    SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
311  if (Matches.size() <= 1)
312    return;
313
314  using Pair = std::pair<DeclAccessPair, FunctionDecl*>;
315
316  // Gets the CUDA function preference for a call from Caller to Match.
317  auto GetCFP = [&](const Pair &Match) {
318    return IdentifyCUDAPreference(Caller, Match.second);
319  };
320
321  // Find the best call preference among the functions in Matches.
322  CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
323      Matches.begin(), Matches.end(),
324      [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); }));
325
326  // Erase all functions with lower priority.
327  llvm::erase_if(Matches,
328                 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; });
329}
330
331/// When an implicitly-declared special member has to invoke more than one
332/// base/field special member, conflicts may occur in the targets of these
333/// members. For example, if one base's member __host__ and another's is
334/// __device__, it's a conflict.
335/// This function figures out if the given targets \param Target1 and
336/// \param Target2 conflict, and if they do not it fills in
337/// \param ResolvedTarget with a target that resolves for both calls.
338/// \return true if there's a conflict, false otherwise.
339static bool
340resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
341                                Sema::CUDAFunctionTarget Target2,
342                                Sema::CUDAFunctionTarget *ResolvedTarget) {
343  // Only free functions and static member functions may be global.
344  assert(Target1 != Sema::CFT_Global);
345  assert(Target2 != Sema::CFT_Global);
346
347  if (Target1 == Sema::CFT_HostDevice) {
348    *ResolvedTarget = Target2;
349  } else if (Target2 == Sema::CFT_HostDevice) {
350    *ResolvedTarget = Target1;
351  } else if (Target1 != Target2) {
352    return true;
353  } else {
354    *ResolvedTarget = Target1;
355  }
356
357  return false;
358}
359
360bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
361                                                   CXXSpecialMember CSM,
362                                                   CXXMethodDecl *MemberDecl,
363                                                   bool ConstRHS,
364                                                   bool Diagnose) {
365  // If the defaulted special member is defined lexically outside of its
366  // owning class, or the special member already has explicit device or host
367  // attributes, do not infer.
368  bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent();
369  bool HasH = MemberDecl->hasAttr<CUDAHostAttr>();
370  bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>();
371  bool HasExplicitAttr =
372      (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) ||
373      (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit());
374  if (!InClass || HasExplicitAttr)
375    return false;
376
377  std::optional<CUDAFunctionTarget> InferredTarget;
378
379  // We're going to invoke special member lookup; mark that these special
380  // members are called from this one, and not from its caller.
381  ContextRAII MethodContext(*this, MemberDecl);
382
383  // Look for special members in base classes that should be invoked from here.
384  // Infer the target of this member base on the ones it should call.
385  // Skip direct and indirect virtual bases for abstract classes.
386  llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
387  for (const auto &B : ClassDecl->bases()) {
388    if (!B.isVirtual()) {
389      Bases.push_back(&B);
390    }
391  }
392
393  if (!ClassDecl->isAbstract()) {
394    llvm::append_range(Bases, llvm::make_pointer_range(ClassDecl->vbases()));
395  }
396
397  for (const auto *B : Bases) {
398    const RecordType *BaseType = B->getType()->getAs<RecordType>();
399    if (!BaseType) {
400      continue;
401    }
402
403    CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
404    Sema::SpecialMemberOverloadResult SMOR =
405        LookupSpecialMember(BaseClassDecl, CSM,
406                            /* ConstArg */ ConstRHS,
407                            /* VolatileArg */ false,
408                            /* RValueThis */ false,
409                            /* ConstThis */ false,
410                            /* VolatileThis */ false);
411
412    if (!SMOR.getMethod())
413      continue;
414
415    CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod());
416    if (!InferredTarget) {
417      InferredTarget = BaseMethodTarget;
418    } else {
419      bool ResolutionError = resolveCalleeCUDATargetConflict(
420          *InferredTarget, BaseMethodTarget, &*InferredTarget);
421      if (ResolutionError) {
422        if (Diagnose) {
423          Diag(ClassDecl->getLocation(),
424               diag::note_implicit_member_target_infer_collision)
425              << (unsigned)CSM << *InferredTarget << BaseMethodTarget;
426        }
427        MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
428        return true;
429      }
430    }
431  }
432
433  // Same as for bases, but now for special members of fields.
434  for (const auto *F : ClassDecl->fields()) {
435    if (F->isInvalidDecl()) {
436      continue;
437    }
438
439    const RecordType *FieldType =
440        Context.getBaseElementType(F->getType())->getAs<RecordType>();
441    if (!FieldType) {
442      continue;
443    }
444
445    CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
446    Sema::SpecialMemberOverloadResult SMOR =
447        LookupSpecialMember(FieldRecDecl, CSM,
448                            /* ConstArg */ ConstRHS && !F->isMutable(),
449                            /* VolatileArg */ false,
450                            /* RValueThis */ false,
451                            /* ConstThis */ false,
452                            /* VolatileThis */ false);
453
454    if (!SMOR.getMethod())
455      continue;
456
457    CUDAFunctionTarget FieldMethodTarget =
458        IdentifyCUDATarget(SMOR.getMethod());
459    if (!InferredTarget) {
460      InferredTarget = FieldMethodTarget;
461    } else {
462      bool ResolutionError = resolveCalleeCUDATargetConflict(
463          *InferredTarget, FieldMethodTarget, &*InferredTarget);
464      if (ResolutionError) {
465        if (Diagnose) {
466          Diag(ClassDecl->getLocation(),
467               diag::note_implicit_member_target_infer_collision)
468              << (unsigned)CSM << *InferredTarget << FieldMethodTarget;
469        }
470        MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
471        return true;
472      }
473    }
474  }
475
476
477  // If no target was inferred, mark this member as __host__ __device__;
478  // it's the least restrictive option that can be invoked from any target.
479  bool NeedsH = true, NeedsD = true;
480  if (InferredTarget) {
481    if (*InferredTarget == CFT_Device)
482      NeedsH = false;
483    else if (*InferredTarget == CFT_Host)
484      NeedsD = false;
485  }
486
487  // We either setting attributes first time, or the inferred ones must match
488  // previously set ones.
489  if (NeedsD && !HasD)
490    MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
491  if (NeedsH && !HasH)
492    MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
493
494  return false;
495}
496
497bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
498  if (!CD->isDefined() && CD->isTemplateInstantiation())
499    InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
500
501  // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
502  // empty at a point in the translation unit, if it is either a
503  // trivial constructor
504  if (CD->isTrivial())
505    return true;
506
507  // ... or it satisfies all of the following conditions:
508  // The constructor function has been defined.
509  // The constructor function has no parameters,
510  // and the function body is an empty compound statement.
511  if (!(CD->hasTrivialBody() && CD->getNumParams() == 0))
512    return false;
513
514  // Its class has no virtual functions and no virtual base classes.
515  if (CD->getParent()->isDynamicClass())
516    return false;
517
518  // Union ctor does not call ctors of its data members.
519  if (CD->getParent()->isUnion())
520    return true;
521
522  // The only form of initializer allowed is an empty constructor.
523  // This will recursively check all base classes and member initializers
524  if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
525        if (const CXXConstructExpr *CE =
526                dyn_cast<CXXConstructExpr>(CI->getInit()))
527          return isEmptyCudaConstructor(Loc, CE->getConstructor());
528        return false;
529      }))
530    return false;
531
532  return true;
533}
534
535bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
536  // No destructor -> no problem.
537  if (!DD)
538    return true;
539
540  if (!DD->isDefined() && DD->isTemplateInstantiation())
541    InstantiateFunctionDefinition(Loc, DD->getFirstDecl());
542
543  // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
544  // empty at a point in the translation unit, if it is either a
545  // trivial constructor
546  if (DD->isTrivial())
547    return true;
548
549  // ... or it satisfies all of the following conditions:
550  // The destructor function has been defined.
551  // and the function body is an empty compound statement.
552  if (!DD->hasTrivialBody())
553    return false;
554
555  const CXXRecordDecl *ClassDecl = DD->getParent();
556
557  // Its class has no virtual functions and no virtual base classes.
558  if (ClassDecl->isDynamicClass())
559    return false;
560
561  // Union does not have base class and union dtor does not call dtors of its
562  // data members.
563  if (DD->getParent()->isUnion())
564    return true;
565
566  // Only empty destructors are allowed. This will recursively check
567  // destructors for all base classes...
568  if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {
569        if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
570          return isEmptyCudaDestructor(Loc, RD->getDestructor());
571        return true;
572      }))
573    return false;
574
575  // ... and member fields.
576  if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
577        if (CXXRecordDecl *RD = Field->getType()
578                                    ->getBaseElementTypeUnsafe()
579                                    ->getAsCXXRecordDecl())
580          return isEmptyCudaDestructor(Loc, RD->getDestructor());
581        return true;
582      }))
583    return false;
584
585  return true;
586}
587
588namespace {
589enum CUDAInitializerCheckKind {
590  CICK_DeviceOrConstant, // Check initializer for device/constant variable
591  CICK_Shared,           // Check initializer for shared variable
592};
593
594bool IsDependentVar(VarDecl *VD) {
595  if (VD->getType()->isDependentType())
596    return true;
597  if (const auto *Init = VD->getInit())
598    return Init->isValueDependent();
599  return false;
600}
601
602// Check whether a variable has an allowed initializer for a CUDA device side
603// variable with global storage. \p VD may be a host variable to be checked for
604// potential promotion to device side variable.
605//
606// CUDA/HIP allows only empty constructors as initializers for global
607// variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all
608// __shared__ variables whether they are local or not (they all are implicitly
609// static in CUDA). One exception is that CUDA allows constant initializers
610// for __constant__ and __device__ variables.
611bool HasAllowedCUDADeviceStaticInitializer(Sema &S, VarDecl *VD,
612                                           CUDAInitializerCheckKind CheckKind) {
613  assert(!VD->isInvalidDecl() && VD->hasGlobalStorage());
614  assert(!IsDependentVar(VD) && "do not check dependent var");
615  const Expr *Init = VD->getInit();
616  auto IsEmptyInit = [&](const Expr *Init) {
617    if (!Init)
618      return true;
619    if (const auto *CE = dyn_cast<CXXConstructExpr>(Init)) {
620      return S.isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor());
621    }
622    return false;
623  };
624  auto IsConstantInit = [&](const Expr *Init) {
625    assert(Init);
626    ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.Context,
627                                                    /*NoWronSidedVars=*/true);
628    return Init->isConstantInitializer(S.Context,
629                                       VD->getType()->isReferenceType());
630  };
631  auto HasEmptyDtor = [&](VarDecl *VD) {
632    if (const auto *RD = VD->getType()->getAsCXXRecordDecl())
633      return S.isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor());
634    return true;
635  };
636  if (CheckKind == CICK_Shared)
637    return IsEmptyInit(Init) && HasEmptyDtor(VD);
638  return S.LangOpts.GPUAllowDeviceInit ||
639         ((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD));
640}
641} // namespace
642
643void Sema::checkAllowedCUDAInitializer(VarDecl *VD) {
644  // Return early if VD is inside a non-instantiated template function since
645  // the implicit constructor is not defined yet.
646  if (const FunctionDecl *FD =
647          dyn_cast_or_null<FunctionDecl>(VD->getDeclContext()))
648    if (FD->isDependentContext())
649      return;
650
651  // Do not check dependent variables since the ctor/dtor/initializer are not
652  // determined. Do it after instantiation.
653  if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() ||
654      IsDependentVar(VD))
655    return;
656  const Expr *Init = VD->getInit();
657  bool IsSharedVar = VD->hasAttr<CUDASharedAttr>();
658  bool IsDeviceOrConstantVar =
659      !IsSharedVar &&
660      (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>());
661  if (IsDeviceOrConstantVar || IsSharedVar) {
662    if (HasAllowedCUDADeviceStaticInitializer(
663            *this, VD, IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant))
664      return;
665    Diag(VD->getLocation(),
666         IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init)
667        << Init->getSourceRange();
668    VD->setInvalidDecl();
669  } else {
670    // This is a host-side global variable.  Check that the initializer is
671    // callable from the host side.
672    const FunctionDecl *InitFn = nullptr;
673    if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) {
674      InitFn = CE->getConstructor();
675    } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) {
676      InitFn = CE->getDirectCallee();
677    }
678    if (InitFn) {
679      CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn);
680      if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) {
681        Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer)
682            << InitFnTarget << InitFn;
683        Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn;
684        VD->setInvalidDecl();
685      }
686    }
687  }
688}
689
690void Sema::CUDARecordImplicitHostDeviceFuncUsedByDevice(
691    const FunctionDecl *Callee) {
692  FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true);
693  if (!Caller)
694    return;
695
696  if (!isCUDAImplicitHostDeviceFunction(Callee))
697    return;
698
699  CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller);
700
701  // Record whether an implicit host device function is used on device side.
702  if (CallerTarget != CFT_Device && CallerTarget != CFT_Global &&
703      (CallerTarget != CFT_HostDevice ||
704       (isCUDAImplicitHostDeviceFunction(Caller) &&
705        !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller))))
706    return;
707
708  getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(Callee);
709}
710
711// With -fcuda-host-device-constexpr, an unattributed constexpr function is
712// treated as implicitly __host__ __device__, unless:
713//  * it is a variadic function (device-side variadic functions are not
714//    allowed), or
715//  * a __device__ function with this signature was already declared, in which
716//    case in which case we output an error, unless the __device__ decl is in a
717//    system header, in which case we leave the constexpr function unattributed.
718//
719// In addition, all function decls are treated as __host__ __device__ when
720// ForceCUDAHostDeviceDepth > 0 (corresponding to code within a
721//   #pragma clang force_cuda_host_device_begin/end
722// pair).
723void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD,
724                                       const LookupResult &Previous) {
725  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
726
727  if (ForceCUDAHostDeviceDepth > 0) {
728    if (!NewD->hasAttr<CUDAHostAttr>())
729      NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
730    if (!NewD->hasAttr<CUDADeviceAttr>())
731      NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
732    return;
733  }
734
735  // If a template function has no host/device/global attributes,
736  // make it implicitly host device function.
737  if (getLangOpts().OffloadImplicitHostDeviceTemplates &&
738      !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() &&
739      !NewD->hasAttr<CUDAGlobalAttr>() &&
740      (NewD->getDescribedFunctionTemplate() ||
741       NewD->isFunctionTemplateSpecialization())) {
742    NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
743    NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
744    return;
745  }
746
747  if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
748      NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
749      NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
750    return;
751
752  // Is D a __device__ function with the same signature as NewD, ignoring CUDA
753  // attributes?
754  auto IsMatchingDeviceFn = [&](NamedDecl *D) {
755    if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
756      D = Using->getTargetDecl();
757    FunctionDecl *OldD = D->getAsFunction();
758    return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
759           !OldD->hasAttr<CUDAHostAttr>() &&
760           !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false,
761                       /* ConsiderCudaAttrs = */ false);
762  };
763  auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
764  if (It != Previous.end()) {
765    // We found a __device__ function with the same name and signature as NewD
766    // (ignoring CUDA attrs).  This is an error unless that function is defined
767    // in a system header, in which case we simply return without making NewD
768    // host+device.
769    NamedDecl *Match = *It;
770    if (!getSourceManager().isInSystemHeader(Match->getLocation())) {
771      Diag(NewD->getLocation(),
772           diag::err_cuda_unattributed_constexpr_cannot_overload_device)
773          << NewD;
774      Diag(Match->getLocation(),
775           diag::note_cuda_conflicting_device_function_declared_here);
776    }
777    return;
778  }
779
780  NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
781  NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
782}
783
784// TODO: `__constant__` memory may be a limited resource for certain targets.
785// A safeguard may be needed at the end of compilation pipeline if
786// `__constant__` memory usage goes beyond limit.
787void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) {
788  // Do not promote dependent variables since the cotr/dtor/initializer are
789  // not determined. Do it after instantiation.
790  if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() &&
791      !VD->hasAttr<CUDASharedAttr>() &&
792      (VD->isFileVarDecl() || VD->isStaticDataMember()) &&
793      !IsDependentVar(VD) &&
794      ((VD->isConstexpr() || VD->getType().isConstQualified()) &&
795       HasAllowedCUDADeviceStaticInitializer(*this, VD,
796                                             CICK_DeviceOrConstant))) {
797    VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext()));
798  }
799}
800
801Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
802                                                       unsigned DiagID) {
803  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
804  FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true);
805  SemaDiagnosticBuilder::Kind DiagKind = [&] {
806    if (!CurFunContext)
807      return SemaDiagnosticBuilder::K_Nop;
808    switch (CurrentCUDATarget()) {
809    case CFT_Global:
810    case CFT_Device:
811      return SemaDiagnosticBuilder::K_Immediate;
812    case CFT_HostDevice:
813      // An HD function counts as host code if we're compiling for host, and
814      // device code if we're compiling for device.  Defer any errors in device
815      // mode until the function is known-emitted.
816      if (!getLangOpts().CUDAIsDevice)
817        return SemaDiagnosticBuilder::K_Nop;
818      if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID))
819        return SemaDiagnosticBuilder::K_Immediate;
820      return (getEmissionStatus(CurFunContext) ==
821              FunctionEmissionStatus::Emitted)
822                 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
823                 : SemaDiagnosticBuilder::K_Deferred;
824    default:
825      return SemaDiagnosticBuilder::K_Nop;
826    }
827  }();
828  return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this);
829}
830
831Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
832                                                     unsigned DiagID) {
833  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
834  FunctionDecl *CurFunContext = getCurFunctionDecl(/*AllowLambda=*/true);
835  SemaDiagnosticBuilder::Kind DiagKind = [&] {
836    if (!CurFunContext)
837      return SemaDiagnosticBuilder::K_Nop;
838    switch (CurrentCUDATarget()) {
839    case CFT_Host:
840      return SemaDiagnosticBuilder::K_Immediate;
841    case CFT_HostDevice:
842      // An HD function counts as host code if we're compiling for host, and
843      // device code if we're compiling for device.  Defer any errors in device
844      // mode until the function is known-emitted.
845      if (getLangOpts().CUDAIsDevice)
846        return SemaDiagnosticBuilder::K_Nop;
847      if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID))
848        return SemaDiagnosticBuilder::K_Immediate;
849      return (getEmissionStatus(CurFunContext) ==
850              FunctionEmissionStatus::Emitted)
851                 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
852                 : SemaDiagnosticBuilder::K_Deferred;
853    default:
854      return SemaDiagnosticBuilder::K_Nop;
855    }
856  }();
857  return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, *this);
858}
859
860bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
861  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
862  assert(Callee && "Callee may not be null.");
863
864  const auto &ExprEvalCtx = currentEvaluationContext();
865  if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
866    return true;
867
868  // FIXME: Is bailing out early correct here?  Should we instead assume that
869  // the caller is a global initializer?
870  FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true);
871  if (!Caller)
872    return true;
873
874  // If the caller is known-emitted, mark the callee as known-emitted.
875  // Otherwise, mark the call in our call graph so we can traverse it later.
876  bool CallerKnownEmitted =
877      getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted;
878  SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee,
879                                          CallerKnownEmitted] {
880    switch (IdentifyCUDAPreference(Caller, Callee)) {
881    case CFP_Never:
882    case CFP_WrongSide:
883      assert(Caller && "Never/wrongSide calls require a non-null caller");
884      // If we know the caller will be emitted, we know this wrong-side call
885      // will be emitted, so it's an immediate error.  Otherwise, defer the
886      // error until we know the caller is emitted.
887      return CallerKnownEmitted
888                 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
889                 : SemaDiagnosticBuilder::K_Deferred;
890    default:
891      return SemaDiagnosticBuilder::K_Nop;
892    }
893  }();
894
895  if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
896    // For -fgpu-rdc, keep track of external kernels used by host functions.
897    if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode &&
898        Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined())
899      getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);
900    return true;
901  }
902
903  // Avoid emitting this error twice for the same location.  Using a hashtable
904  // like this is unfortunate, but because we must continue parsing as normal
905  // after encountering a deferred error, it's otherwise very tricky for us to
906  // ensure that we only emit this deferred error once.
907  if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second)
908    return true;
909
910  SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this)
911      << IdentifyCUDATarget(Callee) << /*function*/ 0 << Callee
912      << IdentifyCUDATarget(Caller);
913  if (!Callee->getBuiltinID())
914    SemaDiagnosticBuilder(DiagKind, Callee->getLocation(),
915                          diag::note_previous_decl, Caller, *this)
916        << Callee;
917  return DiagKind != SemaDiagnosticBuilder::K_Immediate &&
918         DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack;
919}
920
921// Check the wrong-sided reference capture of lambda for CUDA/HIP.
922// A lambda function may capture a stack variable by reference when it is
923// defined and uses the capture by reference when the lambda is called. When
924// the capture and use happen on different sides, the capture is invalid and
925// should be diagnosed.
926void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee,
927                                  const sema::Capture &Capture) {
928  // In host compilation we only need to check lambda functions emitted on host
929  // side. In such lambda functions, a reference capture is invalid only
930  // if the lambda structure is populated by a device function or kernel then
931  // is passed to and called by a host function. However that is impossible,
932  // since a device function or kernel can only call a device function, also a
933  // kernel cannot pass a lambda back to a host function since we cannot
934  // define a kernel argument type which can hold the lambda before the lambda
935  // itself is defined.
936  if (!LangOpts.CUDAIsDevice)
937    return;
938
939  // File-scope lambda can only do init captures for global variables, which
940  // results in passing by value for these global variables.
941  FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true);
942  if (!Caller)
943    return;
944
945  // In device compilation, we only need to check lambda functions which are
946  // emitted on device side. For such lambdas, a reference capture is invalid
947  // only if the lambda structure is populated by a host function then passed
948  // to and called in a device function or kernel.
949  bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>();
950  bool CallerIsHost =
951      !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>();
952  bool ShouldCheck = CalleeIsDevice && CallerIsHost;
953  if (!ShouldCheck || !Capture.isReferenceCapture())
954    return;
955  auto DiagKind = SemaDiagnosticBuilder::K_Deferred;
956  if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) {
957    SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
958                          diag::err_capture_bad_target, Callee, *this)
959        << Capture.getVariable();
960  } else if (Capture.isThisCapture()) {
961    // Capture of this pointer is allowed since this pointer may be pointing to
962    // managed memory which is accessible on both device and host sides. It only
963    // results in invalid memory access if this pointer points to memory not
964    // accessible on device side.
965    SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
966                          diag::warn_maybe_capture_bad_target_this_ptr, Callee,
967                          *this);
968  }
969}
970
971void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
972  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
973  if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>())
974    return;
975  Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
976  Method->addAttr(CUDAHostAttr::CreateImplicit(Context));
977}
978
979void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
980                                   const LookupResult &Previous) {
981  assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
982  CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD);
983  for (NamedDecl *OldND : Previous) {
984    FunctionDecl *OldFD = OldND->getAsFunction();
985    if (!OldFD)
986      continue;
987
988    CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD);
989    // Don't allow HD and global functions to overload other functions with the
990    // same signature.  We allow overloading based on CUDA attributes so that
991    // functions can have different implementations on the host and device, but
992    // HD/global functions "exist" in some sense on both the host and device, so
993    // should have the same implementation on both sides.
994    if (NewTarget != OldTarget &&
995        ((NewTarget == CFT_HostDevice &&
996          !(LangOpts.OffloadImplicitHostDeviceTemplates &&
997            isCUDAImplicitHostDeviceFunction(NewFD) &&
998            OldTarget == CFT_Device)) ||
999         (OldTarget == CFT_HostDevice &&
1000          !(LangOpts.OffloadImplicitHostDeviceTemplates &&
1001            isCUDAImplicitHostDeviceFunction(OldFD) &&
1002            NewTarget == CFT_Device)) ||
1003         (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) &&
1004        !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false,
1005                    /* ConsiderCudaAttrs = */ false)) {
1006      Diag(NewFD->getLocation(), diag::err_cuda_ovl_target)
1007          << NewTarget << NewFD->getDeclName() << OldTarget << OldFD;
1008      Diag(OldFD->getLocation(), diag::note_previous_declaration);
1009      NewFD->setInvalidDecl();
1010      break;
1011    }
1012  }
1013}
1014
1015template <typename AttrTy>
1016static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
1017                              const FunctionDecl &TemplateFD) {
1018  if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
1019    AttrTy *Clone = Attribute->clone(S.Context);
1020    Clone->setInherited(true);
1021    FD->addAttr(Clone);
1022  }
1023}
1024
1025void Sema::inheritCUDATargetAttrs(FunctionDecl *FD,
1026                                  const FunctionTemplateDecl &TD) {
1027  const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
1028  copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD);
1029  copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD);
1030  copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD);
1031}
1032
1033std::string Sema::getCudaConfigureFuncName() const {
1034  if (getLangOpts().HIP)
1035    return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"
1036                                            : "hipConfigureCall";
1037
1038  // New CUDA kernel launch sequence.
1039  if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(),
1040                         CudaFeature::CUDA_USES_NEW_LAUNCH))
1041    return "__cudaPushCallConfiguration";
1042
1043  // Legacy CUDA kernel configuration call
1044  return "cudaConfigureCall";
1045}
1046