1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "CGCXXABI.h"
15#include "CGCleanup.h"
16#include "CGRecordLayout.h"
17#include "CodeGenFunction.h"
18#include "clang/AST/Attr.h"
19#include "clang/AST/Decl.h"
20#include "clang/AST/OpenMPClause.h"
21#include "clang/AST/StmtOpenMP.h"
22#include "clang/AST/StmtVisitor.h"
23#include "clang/Basic/BitmaskEnum.h"
24#include "clang/CodeGen/ConstantInitBuilder.h"
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/SetOperations.h"
27#include "llvm/Bitcode/BitcodeReader.h"
28#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29#include "llvm/IR/DerivedTypes.h"
30#include "llvm/IR/GlobalValue.h"
31#include "llvm/IR/Value.h"
32#include "llvm/Support/Format.h"
33#include "llvm/Support/raw_ostream.h"
34#include <cassert>
35
36using namespace clang;
37using namespace CodeGen;
38using namespace llvm::omp;
39
40namespace {
41/// Base class for handling code generation inside OpenMP regions.
42class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
43public:
44  /// Kinds of OpenMP regions used in codegen.
45  enum CGOpenMPRegionKind {
46    /// Region with outlined function for standalone 'parallel'
47    /// directive.
48    ParallelOutlinedRegion,
49    /// Region with outlined function for standalone 'task' directive.
50    TaskOutlinedRegion,
51    /// Region for constructs that do not require function outlining,
52    /// like 'for', 'sections', 'atomic' etc. directives.
53    InlinedRegion,
54    /// Region with outlined function for standalone 'target' directive.
55    TargetRegion,
56  };
57
58  CGOpenMPRegionInfo(const CapturedStmt &CS,
59                     const CGOpenMPRegionKind RegionKind,
60                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                     bool HasCancel)
62      : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
63        CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
64
65  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
66                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
67                     bool HasCancel)
68      : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
69        Kind(Kind), HasCancel(HasCancel) {}
70
71  /// Get a variable or parameter for storing global thread id
72  /// inside OpenMP construct.
73  virtual const VarDecl *getThreadIDVariable() const = 0;
74
75  /// Emit the captured statement body.
76  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
77
78  /// Get an LValue for the current ThreadID variable.
79  /// \return LValue for thread id variable. This LValue always has type int32*.
80  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
81
82  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
83
84  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
85
86  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
87
88  bool hasCancel() const { return HasCancel; }
89
90  static bool classof(const CGCapturedStmtInfo *Info) {
91    return Info->getKind() == CR_OpenMP;
92  }
93
94  ~CGOpenMPRegionInfo() override = default;
95
96protected:
97  CGOpenMPRegionKind RegionKind;
98  RegionCodeGenTy CodeGen;
99  OpenMPDirectiveKind Kind;
100  bool HasCancel;
101};
102
103/// API for captured statement code generation in OpenMP constructs.
104class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
105public:
106  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
107                             const RegionCodeGenTy &CodeGen,
108                             OpenMPDirectiveKind Kind, bool HasCancel,
109                             StringRef HelperName)
110      : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
111                           HasCancel),
112        ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
113    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
114  }
115
116  /// Get a variable or parameter for storing global thread id
117  /// inside OpenMP construct.
118  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
119
120  /// Get the name of the capture helper.
121  StringRef getHelperName() const override { return HelperName; }
122
123  static bool classof(const CGCapturedStmtInfo *Info) {
124    return CGOpenMPRegionInfo::classof(Info) &&
125           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
126               ParallelOutlinedRegion;
127  }
128
129private:
130  /// A variable or parameter storing global thread id for OpenMP
131  /// constructs.
132  const VarDecl *ThreadIDVar;
133  StringRef HelperName;
134};
135
136/// API for captured statement code generation in OpenMP constructs.
137class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
138public:
139  class UntiedTaskActionTy final : public PrePostActionTy {
140    bool Untied;
141    const VarDecl *PartIDVar;
142    const RegionCodeGenTy UntiedCodeGen;
143    llvm::SwitchInst *UntiedSwitch = nullptr;
144
145  public:
146    UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
147                       const RegionCodeGenTy &UntiedCodeGen)
148        : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
149    void Enter(CodeGenFunction &CGF) override {
150      if (Untied) {
151        // Emit task switching point.
152        LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
153            CGF.GetAddrOfLocalVar(PartIDVar),
154            PartIDVar->getType()->castAs<PointerType>());
155        llvm::Value *Res =
156            CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
157        llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
158        UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
159        CGF.EmitBlock(DoneBB);
160        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
161        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
162        UntiedSwitch->addCase(CGF.Builder.getInt32(0),
163                              CGF.Builder.GetInsertBlock());
164        emitUntiedSwitch(CGF);
165      }
166    }
167    void emitUntiedSwitch(CodeGenFunction &CGF) const {
168      if (Untied) {
169        LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
170            CGF.GetAddrOfLocalVar(PartIDVar),
171            PartIDVar->getType()->castAs<PointerType>());
172        CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
173                              PartIdLVal);
174        UntiedCodeGen(CGF);
175        CodeGenFunction::JumpDest CurPoint =
176            CGF.getJumpDestInCurrentScope(".untied.next.");
177        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
178        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
179        UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                              CGF.Builder.GetInsertBlock());
181        CGF.EmitBranchThroughCleanup(CurPoint);
182        CGF.EmitBlock(CurPoint.getBlock());
183      }
184    }
185    unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
186  };
187  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
188                                 const VarDecl *ThreadIDVar,
189                                 const RegionCodeGenTy &CodeGen,
190                                 OpenMPDirectiveKind Kind, bool HasCancel,
191                                 const UntiedTaskActionTy &Action)
192      : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
193        ThreadIDVar(ThreadIDVar), Action(Action) {
194    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
195  }
196
197  /// Get a variable or parameter for storing global thread id
198  /// inside OpenMP construct.
199  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
200
201  /// Get an LValue for the current ThreadID variable.
202  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
203
204  /// Get the name of the capture helper.
205  StringRef getHelperName() const override { return ".omp_outlined."; }
206
207  void emitUntiedSwitch(CodeGenFunction &CGF) override {
208    Action.emitUntiedSwitch(CGF);
209  }
210
211  static bool classof(const CGCapturedStmtInfo *Info) {
212    return CGOpenMPRegionInfo::classof(Info) &&
213           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
214               TaskOutlinedRegion;
215  }
216
217private:
218  /// A variable or parameter storing global thread id for OpenMP
219  /// constructs.
220  const VarDecl *ThreadIDVar;
221  /// Action for emitting code for untied tasks.
222  const UntiedTaskActionTy &Action;
223};
224
225/// API for inlined captured statement code generation in OpenMP
226/// constructs.
227class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
228public:
229  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
230                            const RegionCodeGenTy &CodeGen,
231                            OpenMPDirectiveKind Kind, bool HasCancel)
232      : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
233        OldCSI(OldCSI),
234        OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
235
236  // Retrieve the value of the context parameter.
237  llvm::Value *getContextValue() const override {
238    if (OuterRegionInfo)
239      return OuterRegionInfo->getContextValue();
240    llvm_unreachable("No context value for inlined OpenMP region");
241  }
242
243  void setContextValue(llvm::Value *V) override {
244    if (OuterRegionInfo) {
245      OuterRegionInfo->setContextValue(V);
246      return;
247    }
248    llvm_unreachable("No context value for inlined OpenMP region");
249  }
250
251  /// Lookup the captured field decl for a variable.
252  const FieldDecl *lookup(const VarDecl *VD) const override {
253    if (OuterRegionInfo)
254      return OuterRegionInfo->lookup(VD);
255    // If there is no outer outlined region,no need to lookup in a list of
256    // captured variables, we can use the original one.
257    return nullptr;
258  }
259
260  FieldDecl *getThisFieldDecl() const override {
261    if (OuterRegionInfo)
262      return OuterRegionInfo->getThisFieldDecl();
263    return nullptr;
264  }
265
266  /// Get a variable or parameter for storing global thread id
267  /// inside OpenMP construct.
268  const VarDecl *getThreadIDVariable() const override {
269    if (OuterRegionInfo)
270      return OuterRegionInfo->getThreadIDVariable();
271    return nullptr;
272  }
273
274  /// Get an LValue for the current ThreadID variable.
275  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
276    if (OuterRegionInfo)
277      return OuterRegionInfo->getThreadIDVariableLValue(CGF);
278    llvm_unreachable("No LValue for inlined OpenMP construct");
279  }
280
281  /// Get the name of the capture helper.
282  StringRef getHelperName() const override {
283    if (auto *OuterRegionInfo = getOldCSI())
284      return OuterRegionInfo->getHelperName();
285    llvm_unreachable("No helper name for inlined OpenMP construct");
286  }
287
288  void emitUntiedSwitch(CodeGenFunction &CGF) override {
289    if (OuterRegionInfo)
290      OuterRegionInfo->emitUntiedSwitch(CGF);
291  }
292
293  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
294
295  static bool classof(const CGCapturedStmtInfo *Info) {
296    return CGOpenMPRegionInfo::classof(Info) &&
297           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
298  }
299
300  ~CGOpenMPInlinedRegionInfo() override = default;
301
302private:
303  /// CodeGen info about outer OpenMP region.
304  CodeGenFunction::CGCapturedStmtInfo *OldCSI;
305  CGOpenMPRegionInfo *OuterRegionInfo;
306};
307
308/// API for captured statement code generation in OpenMP target
309/// constructs. For this captures, implicit parameters are used instead of the
310/// captured fields. The name of the target region has to be unique in a given
311/// application so it is provided by the client, because only the client has
312/// the information to generate that.
313class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
314public:
315  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
316                           const RegionCodeGenTy &CodeGen, StringRef HelperName)
317      : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
318                           /*HasCancel=*/false),
319        HelperName(HelperName) {}
320
321  /// This is unused for target regions because each starts executing
322  /// with a single thread.
323  const VarDecl *getThreadIDVariable() const override { return nullptr; }
324
325  /// Get the name of the capture helper.
326  StringRef getHelperName() const override { return HelperName; }
327
328  static bool classof(const CGCapturedStmtInfo *Info) {
329    return CGOpenMPRegionInfo::classof(Info) &&
330           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
331  }
332
333private:
334  StringRef HelperName;
335};
336
337static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
338  llvm_unreachable("No codegen for expressions");
339}
340/// API for generation of expressions captured in a innermost OpenMP
341/// region.
342class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
343public:
344  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
345      : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
346                                  OMPD_unknown,
347                                  /*HasCancel=*/false),
348        PrivScope(CGF) {
349    // Make sure the globals captured in the provided statement are local by
350    // using the privatization logic. We assume the same variable is not
351    // captured more than once.
352    for (const auto &C : CS.captures()) {
353      if (!C.capturesVariable() && !C.capturesVariableByCopy())
354        continue;
355
356      const VarDecl *VD = C.getCapturedVar();
357      if (VD->isLocalVarDeclOrParm())
358        continue;
359
360      DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
361                      /*RefersToEnclosingVariableOrCapture=*/false,
362                      VD->getType().getNonReferenceType(), VK_LValue,
363                      C.getLocation());
364      PrivScope.addPrivate(
365          VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
366    }
367    (void)PrivScope.Privatize();
368  }
369
370  /// Lookup the captured field decl for a variable.
371  const FieldDecl *lookup(const VarDecl *VD) const override {
372    if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
373      return FD;
374    return nullptr;
375  }
376
377  /// Emit the captured statement body.
378  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
379    llvm_unreachable("No body for expressions");
380  }
381
382  /// Get a variable or parameter for storing global thread id
383  /// inside OpenMP construct.
384  const VarDecl *getThreadIDVariable() const override {
385    llvm_unreachable("No thread id for expressions");
386  }
387
388  /// Get the name of the capture helper.
389  StringRef getHelperName() const override {
390    llvm_unreachable("No helper name for expressions");
391  }
392
393  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
394
395private:
396  /// Private scope to capture global variables.
397  CodeGenFunction::OMPPrivateScope PrivScope;
398};
399
400/// RAII for emitting code of OpenMP constructs.
401class InlinedOpenMPRegionRAII {
402  CodeGenFunction &CGF;
403  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
404  FieldDecl *LambdaThisCaptureField = nullptr;
405  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
406
407public:
408  /// Constructs region for combined constructs.
409  /// \param CodeGen Code generation sequence for combined directives. Includes
410  /// a list of functions used for code generation of implicitly inlined
411  /// regions.
412  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
413                          OpenMPDirectiveKind Kind, bool HasCancel)
414      : CGF(CGF) {
415    // Start emission for the construct.
416    CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
417        CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
418    std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
419    LambdaThisCaptureField = CGF.LambdaThisCaptureField;
420    CGF.LambdaThisCaptureField = nullptr;
421    BlockInfo = CGF.BlockInfo;
422    CGF.BlockInfo = nullptr;
423  }
424
425  ~InlinedOpenMPRegionRAII() {
426    // Restore original CapturedStmtInfo only if we're done with code emission.
427    auto *OldCSI =
428        cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
429    delete CGF.CapturedStmtInfo;
430    CGF.CapturedStmtInfo = OldCSI;
431    std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
432    CGF.LambdaThisCaptureField = LambdaThisCaptureField;
433    CGF.BlockInfo = BlockInfo;
434  }
435};
436
437/// Values for bit flags used in the ident_t to describe the fields.
438/// All enumeric elements are named and described in accordance with the code
439/// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
440enum OpenMPLocationFlags : unsigned {
441  /// Use trampoline for internal microtask.
442  OMP_IDENT_IMD = 0x01,
443  /// Use c-style ident structure.
444  OMP_IDENT_KMPC = 0x02,
445  /// Atomic reduction option for kmpc_reduce.
446  OMP_ATOMIC_REDUCE = 0x10,
447  /// Explicit 'barrier' directive.
448  OMP_IDENT_BARRIER_EXPL = 0x20,
449  /// Implicit barrier in code.
450  OMP_IDENT_BARRIER_IMPL = 0x40,
451  /// Implicit barrier in 'for' directive.
452  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
453  /// Implicit barrier in 'sections' directive.
454  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
455  /// Implicit barrier in 'single' directive.
456  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
457  /// Call of __kmp_for_static_init for static loop.
458  OMP_IDENT_WORK_LOOP = 0x200,
459  /// Call of __kmp_for_static_init for sections.
460  OMP_IDENT_WORK_SECTIONS = 0x400,
461  /// Call of __kmp_for_static_init for distribute.
462  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
463  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
464};
465
466namespace {
467LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
468/// Values for bit flags for marking which requires clauses have been used.
469enum OpenMPOffloadingRequiresDirFlags : int64_t {
470  /// flag undefined.
471  OMP_REQ_UNDEFINED               = 0x000,
472  /// no requires clause present.
473  OMP_REQ_NONE                    = 0x001,
474  /// reverse_offload clause.
475  OMP_REQ_REVERSE_OFFLOAD         = 0x002,
476  /// unified_address clause.
477  OMP_REQ_UNIFIED_ADDRESS         = 0x004,
478  /// unified_shared_memory clause.
479  OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
480  /// dynamic_allocators clause.
481  OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
482  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
483};
484
485enum OpenMPOffloadingReservedDeviceIDs {
486  /// Device ID if the device was not defined, runtime should get it
487  /// from environment variables in the spec.
488  OMP_DEVICEID_UNDEF = -1,
489};
490} // anonymous namespace
491
492/// Describes ident structure that describes a source location.
493/// All descriptions are taken from
494/// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
495/// Original structure:
496/// typedef struct ident {
497///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
498///                                  see above  */
499///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
500///                                  KMP_IDENT_KMPC identifies this union
501///                                  member  */
502///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
503///                                  see above */
504///#if USE_ITT_BUILD
505///                            /*  but currently used for storing
506///                                region-specific ITT */
507///                            /*  contextual information. */
508///#endif /* USE_ITT_BUILD */
509///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
510///                                 C++  */
511///    char const *psource;    /**< String describing the source location.
512///                            The string is composed of semi-colon separated
513//                             fields which describe the source file,
514///                            the function and a pair of line numbers that
515///                            delimit the construct.
516///                             */
517/// } ident_t;
518enum IdentFieldIndex {
519  /// might be used in Fortran
520  IdentField_Reserved_1,
521  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
522  IdentField_Flags,
523  /// Not really used in Fortran any more
524  IdentField_Reserved_2,
525  /// Source[4] in Fortran, do not use for C++
526  IdentField_Reserved_3,
527  /// String describing the source location. The string is composed of
528  /// semi-colon separated fields which describe the source file, the function
529  /// and a pair of line numbers that delimit the construct.
530  IdentField_PSource
531};
532
533/// Schedule types for 'omp for' loops (these enumerators are taken from
534/// the enum sched_type in kmp.h).
535enum OpenMPSchedType {
536  /// Lower bound for default (unordered) versions.
537  OMP_sch_lower = 32,
538  OMP_sch_static_chunked = 33,
539  OMP_sch_static = 34,
540  OMP_sch_dynamic_chunked = 35,
541  OMP_sch_guided_chunked = 36,
542  OMP_sch_runtime = 37,
543  OMP_sch_auto = 38,
544  /// static with chunk adjustment (e.g., simd)
545  OMP_sch_static_balanced_chunked = 45,
546  /// Lower bound for 'ordered' versions.
547  OMP_ord_lower = 64,
548  OMP_ord_static_chunked = 65,
549  OMP_ord_static = 66,
550  OMP_ord_dynamic_chunked = 67,
551  OMP_ord_guided_chunked = 68,
552  OMP_ord_runtime = 69,
553  OMP_ord_auto = 70,
554  OMP_sch_default = OMP_sch_static,
555  /// dist_schedule types
556  OMP_dist_sch_static_chunked = 91,
557  OMP_dist_sch_static = 92,
558  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
559  /// Set if the monotonic schedule modifier was present.
560  OMP_sch_modifier_monotonic = (1 << 29),
561  /// Set if the nonmonotonic schedule modifier was present.
562  OMP_sch_modifier_nonmonotonic = (1 << 30),
563};
564
565enum OpenMPRTLFunction {
566  /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
567  /// kmpc_micro microtask, ...);
568  OMPRTL__kmpc_fork_call,
569  /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
570  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
571  OMPRTL__kmpc_threadprivate_cached,
572  /// Call to void __kmpc_threadprivate_register( ident_t *,
573  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
574  OMPRTL__kmpc_threadprivate_register,
575  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
576  OMPRTL__kmpc_global_thread_num,
577  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
578  // kmp_critical_name *crit);
579  OMPRTL__kmpc_critical,
580  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
581  // global_tid, kmp_critical_name *crit, uintptr_t hint);
582  OMPRTL__kmpc_critical_with_hint,
583  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
584  // kmp_critical_name *crit);
585  OMPRTL__kmpc_end_critical,
586  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
587  // global_tid);
588  OMPRTL__kmpc_cancel_barrier,
589  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
590  OMPRTL__kmpc_barrier,
591  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
592  OMPRTL__kmpc_for_static_fini,
593  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
594  // global_tid);
595  OMPRTL__kmpc_serialized_parallel,
596  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
597  // global_tid);
598  OMPRTL__kmpc_end_serialized_parallel,
599  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
600  // kmp_int32 num_threads);
601  OMPRTL__kmpc_push_num_threads,
602  // Call to void __kmpc_flush(ident_t *loc);
603  OMPRTL__kmpc_flush,
604  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
605  OMPRTL__kmpc_master,
606  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
607  OMPRTL__kmpc_end_master,
608  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
609  // int end_part);
610  OMPRTL__kmpc_omp_taskyield,
611  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
612  OMPRTL__kmpc_single,
613  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
614  OMPRTL__kmpc_end_single,
615  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
616  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
617  // kmp_routine_entry_t *task_entry);
618  OMPRTL__kmpc_omp_task_alloc,
619  // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
620  // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
621  // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
622  // kmp_int64 device_id);
623  OMPRTL__kmpc_omp_target_task_alloc,
624  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
625  // new_task);
626  OMPRTL__kmpc_omp_task,
627  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
628  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
629  // kmp_int32 didit);
630  OMPRTL__kmpc_copyprivate,
631  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
632  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
633  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
634  OMPRTL__kmpc_reduce,
635  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
636  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
637  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
638  // *lck);
639  OMPRTL__kmpc_reduce_nowait,
640  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
641  // kmp_critical_name *lck);
642  OMPRTL__kmpc_end_reduce,
643  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
644  // kmp_critical_name *lck);
645  OMPRTL__kmpc_end_reduce_nowait,
646  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
647  // kmp_task_t * new_task);
648  OMPRTL__kmpc_omp_task_begin_if0,
649  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
650  // kmp_task_t * new_task);
651  OMPRTL__kmpc_omp_task_complete_if0,
652  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
653  OMPRTL__kmpc_ordered,
654  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
655  OMPRTL__kmpc_end_ordered,
656  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
657  // global_tid);
658  OMPRTL__kmpc_omp_taskwait,
659  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
660  OMPRTL__kmpc_taskgroup,
661  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
662  OMPRTL__kmpc_end_taskgroup,
663  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
664  // int proc_bind);
665  OMPRTL__kmpc_push_proc_bind,
666  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
667  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
668  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
669  OMPRTL__kmpc_omp_task_with_deps,
670  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
671  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
672  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
673  OMPRTL__kmpc_omp_wait_deps,
674  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
675  // global_tid, kmp_int32 cncl_kind);
676  OMPRTL__kmpc_cancellationpoint,
677  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
678  // kmp_int32 cncl_kind);
679  OMPRTL__kmpc_cancel,
680  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
681  // kmp_int32 num_teams, kmp_int32 thread_limit);
682  OMPRTL__kmpc_push_num_teams,
683  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
684  // microtask, ...);
685  OMPRTL__kmpc_fork_teams,
686  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
687  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
688  // sched, kmp_uint64 grainsize, void *task_dup);
689  OMPRTL__kmpc_taskloop,
690  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
691  // num_dims, struct kmp_dim *dims);
692  OMPRTL__kmpc_doacross_init,
693  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
694  OMPRTL__kmpc_doacross_fini,
695  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
696  // *vec);
697  OMPRTL__kmpc_doacross_post,
698  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
699  // *vec);
700  OMPRTL__kmpc_doacross_wait,
701  // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
702  // *data);
703  OMPRTL__kmpc_task_reduction_init,
704  // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
705  // *d);
706  OMPRTL__kmpc_task_reduction_get_th_data,
707  // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
708  OMPRTL__kmpc_alloc,
709  // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
710  OMPRTL__kmpc_free,
711
712  //
713  // Offloading related calls
714  //
715  // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
716  // size);
717  OMPRTL__kmpc_push_target_tripcount,
718  // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
719  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
720  // *arg_types);
721  OMPRTL__tgt_target,
722  // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
723  // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
724  // *arg_types);
725  OMPRTL__tgt_target_nowait,
726  // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
727  // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
728  // *arg_types, int32_t num_teams, int32_t thread_limit);
729  OMPRTL__tgt_target_teams,
730  // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
731  // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
732  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
733  OMPRTL__tgt_target_teams_nowait,
734  // Call to void __tgt_register_requires(int64_t flags);
735  OMPRTL__tgt_register_requires,
736  // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
737  // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
738  OMPRTL__tgt_target_data_begin,
739  // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
740  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
741  // *arg_types);
742  OMPRTL__tgt_target_data_begin_nowait,
743  // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
744  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
745  OMPRTL__tgt_target_data_end,
746  // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
747  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
748  // *arg_types);
749  OMPRTL__tgt_target_data_end_nowait,
750  // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
751  // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
752  OMPRTL__tgt_target_data_update,
753  // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
754  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
755  // *arg_types);
756  OMPRTL__tgt_target_data_update_nowait,
757  // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
758  OMPRTL__tgt_mapper_num_components,
759  // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
760  // *base, void *begin, int64_t size, int64_t type);
761  OMPRTL__tgt_push_mapper_component,
762};
763
764/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
765/// region.
766class CleanupTy final : public EHScopeStack::Cleanup {
767  PrePostActionTy *Action;
768
769public:
770  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
771  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
772    if (!CGF.HaveInsertPoint())
773      return;
774    Action->Exit(CGF);
775  }
776};
777
778} // anonymous namespace
779
780void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
781  CodeGenFunction::RunCleanupsScope Scope(CGF);
782  if (PrePostAction) {
783    CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
784    Callback(CodeGen, CGF, *PrePostAction);
785  } else {
786    PrePostActionTy Action;
787    Callback(CodeGen, CGF, Action);
788  }
789}
790
791/// Check if the combiner is a call to UDR combiner and if it is so return the
792/// UDR decl used for reduction.
793static const OMPDeclareReductionDecl *
794getReductionInit(const Expr *ReductionOp) {
795  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
796    if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
797      if (const auto *DRE =
798              dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
799        if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
800          return DRD;
801  return nullptr;
802}
803
804static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
805                                             const OMPDeclareReductionDecl *DRD,
806                                             const Expr *InitOp,
807                                             Address Private, Address Original,
808                                             QualType Ty) {
809  if (DRD->getInitializer()) {
810    std::pair<llvm::Function *, llvm::Function *> Reduction =
811        CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
812    const auto *CE = cast<CallExpr>(InitOp);
813    const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
814    const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
815    const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
816    const auto *LHSDRE =
817        cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
818    const auto *RHSDRE =
819        cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
820    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
821    PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
822                            [=]() { return Private; });
823    PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
824                            [=]() { return Original; });
825    (void)PrivateScope.Privatize();
826    RValue Func = RValue::get(Reduction.second);
827    CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
828    CGF.EmitIgnoredExpr(InitOp);
829  } else {
830    llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
831    std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
832    auto *GV = new llvm::GlobalVariable(
833        CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
834        llvm::GlobalValue::PrivateLinkage, Init, Name);
835    LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
836    RValue InitRVal;
837    switch (CGF.getEvaluationKind(Ty)) {
838    case TEK_Scalar:
839      InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
840      break;
841    case TEK_Complex:
842      InitRVal =
843          RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
844      break;
845    case TEK_Aggregate:
846      InitRVal = RValue::getAggregate(LV.getAddress(CGF));
847      break;
848    }
849    OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
850    CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
851    CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
852                         /*IsInitializer=*/false);
853  }
854}
855
856/// Emit initialization of arrays of complex types.
857/// \param DestAddr Address of the array.
858/// \param Type Type of array.
859/// \param Init Initial expression of array.
860/// \param SrcAddr Address of the original array.
861static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
862                                 QualType Type, bool EmitDeclareReductionInit,
863                                 const Expr *Init,
864                                 const OMPDeclareReductionDecl *DRD,
865                                 Address SrcAddr = Address::invalid()) {
866  // Perform element-by-element initialization.
867  QualType ElementTy;
868
869  // Drill down to the base element type on both arrays.
870  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
871  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
872  DestAddr =
873      CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
874  if (DRD)
875    SrcAddr =
876        CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
877
878  llvm::Value *SrcBegin = nullptr;
879  if (DRD)
880    SrcBegin = SrcAddr.getPointer();
881  llvm::Value *DestBegin = DestAddr.getPointer();
882  // Cast from pointer to array type to pointer to single element.
883  llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
884  // The basic structure here is a while-do loop.
885  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
886  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
887  llvm::Value *IsEmpty =
888      CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
889  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
890
891  // Enter the loop body, making that address the current address.
892  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
893  CGF.EmitBlock(BodyBB);
894
895  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
896
897  llvm::PHINode *SrcElementPHI = nullptr;
898  Address SrcElementCurrent = Address::invalid();
899  if (DRD) {
900    SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
901                                          "omp.arraycpy.srcElementPast");
902    SrcElementPHI->addIncoming(SrcBegin, EntryBB);
903    SrcElementCurrent =
904        Address(SrcElementPHI,
905                SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
906  }
907  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
908      DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
909  DestElementPHI->addIncoming(DestBegin, EntryBB);
910  Address DestElementCurrent =
911      Address(DestElementPHI,
912              DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
913
914  // Emit copy.
915  {
916    CodeGenFunction::RunCleanupsScope InitScope(CGF);
917    if (EmitDeclareReductionInit) {
918      emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
919                                       SrcElementCurrent, ElementTy);
920    } else
921      CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
922                           /*IsInitializer=*/false);
923  }
924
925  if (DRD) {
926    // Shift the address forward by one element.
927    llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
928        SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
929    SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
930  }
931
932  // Shift the address forward by one element.
933  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
934      DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
935  // Check whether we've reached the end.
936  llvm::Value *Done =
937      CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
938  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
939  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
940
941  // Done.
942  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
943}
944
945LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
946  return CGF.EmitOMPSharedLValue(E);
947}
948
949LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
950                                            const Expr *E) {
951  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
952    return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
953  return LValue();
954}
955
956void ReductionCodeGen::emitAggregateInitialization(
957    CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
958    const OMPDeclareReductionDecl *DRD) {
959  // Emit VarDecl with copy init for arrays.
960  // Get the address of the original variable captured in current
961  // captured region.
962  const auto *PrivateVD =
963      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
964  bool EmitDeclareReductionInit =
965      DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
966  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
967                       EmitDeclareReductionInit,
968                       EmitDeclareReductionInit ? ClausesData[N].ReductionOp
969                                                : PrivateVD->getInit(),
970                       DRD, SharedLVal.getAddress(CGF));
971}
972
973ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
974                                   ArrayRef<const Expr *> Privates,
975                                   ArrayRef<const Expr *> ReductionOps) {
976  ClausesData.reserve(Shareds.size());
977  SharedAddresses.reserve(Shareds.size());
978  Sizes.reserve(Shareds.size());
979  BaseDecls.reserve(Shareds.size());
980  auto IPriv = Privates.begin();
981  auto IRed = ReductionOps.begin();
982  for (const Expr *Ref : Shareds) {
983    ClausesData.emplace_back(Ref, *IPriv, *IRed);
984    std::advance(IPriv, 1);
985    std::advance(IRed, 1);
986  }
987}
988
989void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
990  assert(SharedAddresses.size() == N &&
991         "Number of generated lvalues must be exactly N.");
992  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
993  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
994  SharedAddresses.emplace_back(First, Second);
995}
996
997void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
998  const auto *PrivateVD =
999      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1000  QualType PrivateType = PrivateVD->getType();
1001  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1002  if (!PrivateType->isVariablyModifiedType()) {
1003    Sizes.emplace_back(
1004        CGF.getTypeSize(
1005            SharedAddresses[N].first.getType().getNonReferenceType()),
1006        nullptr);
1007    return;
1008  }
1009  llvm::Value *Size;
1010  llvm::Value *SizeInChars;
1011  auto *ElemType = cast<llvm::PointerType>(
1012                       SharedAddresses[N].first.getPointer(CGF)->getType())
1013                       ->getElementType();
1014  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1015  if (AsArraySection) {
1016    Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
1017                                     SharedAddresses[N].first.getPointer(CGF));
1018    Size = CGF.Builder.CreateNUWAdd(
1019        Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1020    SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1021  } else {
1022    SizeInChars = CGF.getTypeSize(
1023        SharedAddresses[N].first.getType().getNonReferenceType());
1024    Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1025  }
1026  Sizes.emplace_back(SizeInChars, Size);
1027  CodeGenFunction::OpaqueValueMapping OpaqueMap(
1028      CGF,
1029      cast<OpaqueValueExpr>(
1030          CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1031      RValue::get(Size));
1032  CGF.EmitVariablyModifiedType(PrivateType);
1033}
1034
1035void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1036                                         llvm::Value *Size) {
1037  const auto *PrivateVD =
1038      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1039  QualType PrivateType = PrivateVD->getType();
1040  if (!PrivateType->isVariablyModifiedType()) {
1041    assert(!Size && !Sizes[N].second &&
1042           "Size should be nullptr for non-variably modified reduction "
1043           "items.");
1044    return;
1045  }
1046  CodeGenFunction::OpaqueValueMapping OpaqueMap(
1047      CGF,
1048      cast<OpaqueValueExpr>(
1049          CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1050      RValue::get(Size));
1051  CGF.EmitVariablyModifiedType(PrivateType);
1052}
1053
1054void ReductionCodeGen::emitInitialization(
1055    CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1056    llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1057  assert(SharedAddresses.size() > N && "No variable was generated");
1058  const auto *PrivateVD =
1059      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1060  const OMPDeclareReductionDecl *DRD =
1061      getReductionInit(ClausesData[N].ReductionOp);
1062  QualType PrivateType = PrivateVD->getType();
1063  PrivateAddr = CGF.Builder.CreateElementBitCast(
1064      PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1065  QualType SharedType = SharedAddresses[N].first.getType();
1066  SharedLVal = CGF.MakeAddrLValue(
1067      CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
1068                                       CGF.ConvertTypeForMem(SharedType)),
1069      SharedType, SharedAddresses[N].first.getBaseInfo(),
1070      CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1071  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1072    emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1073  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1074    emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1075                                     PrivateAddr, SharedLVal.getAddress(CGF),
1076                                     SharedLVal.getType());
1077  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1078             !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1079    CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1080                         PrivateVD->getType().getQualifiers(),
1081                         /*IsInitializer=*/false);
1082  }
1083}
1084
1085bool ReductionCodeGen::needCleanups(unsigned N) {
1086  const auto *PrivateVD =
1087      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1088  QualType PrivateType = PrivateVD->getType();
1089  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1090  return DTorKind != QualType::DK_none;
1091}
1092
1093void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1094                                    Address PrivateAddr) {
1095  const auto *PrivateVD =
1096      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1097  QualType PrivateType = PrivateVD->getType();
1098  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1099  if (needCleanups(N)) {
1100    PrivateAddr = CGF.Builder.CreateElementBitCast(
1101        PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1102    CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1103  }
1104}
1105
1106static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1107                          LValue BaseLV) {
1108  BaseTy = BaseTy.getNonReferenceType();
1109  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1110         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1111    if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1112      BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
1113    } else {
1114      LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
1115      BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1116    }
1117    BaseTy = BaseTy->getPointeeType();
1118  }
1119  return CGF.MakeAddrLValue(
1120      CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
1121                                       CGF.ConvertTypeForMem(ElTy)),
1122      BaseLV.getType(), BaseLV.getBaseInfo(),
1123      CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1124}
1125
1126static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1127                          llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1128                          llvm::Value *Addr) {
1129  Address Tmp = Address::invalid();
1130  Address TopTmp = Address::invalid();
1131  Address MostTopTmp = Address::invalid();
1132  BaseTy = BaseTy.getNonReferenceType();
1133  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1134         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1135    Tmp = CGF.CreateMemTemp(BaseTy);
1136    if (TopTmp.isValid())
1137      CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1138    else
1139      MostTopTmp = Tmp;
1140    TopTmp = Tmp;
1141    BaseTy = BaseTy->getPointeeType();
1142  }
1143  llvm::Type *Ty = BaseLVType;
1144  if (Tmp.isValid())
1145    Ty = Tmp.getElementType();
1146  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1147  if (Tmp.isValid()) {
1148    CGF.Builder.CreateStore(Addr, Tmp);
1149    return MostTopTmp;
1150  }
1151  return Address(Addr, BaseLVAlignment);
1152}
1153
1154static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1155  const VarDecl *OrigVD = nullptr;
1156  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1157    const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1158    while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1159      Base = TempOASE->getBase()->IgnoreParenImpCasts();
1160    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1161      Base = TempASE->getBase()->IgnoreParenImpCasts();
1162    DE = cast<DeclRefExpr>(Base);
1163    OrigVD = cast<VarDecl>(DE->getDecl());
1164  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1165    const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1166    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1167      Base = TempASE->getBase()->IgnoreParenImpCasts();
1168    DE = cast<DeclRefExpr>(Base);
1169    OrigVD = cast<VarDecl>(DE->getDecl());
1170  }
1171  return OrigVD;
1172}
1173
1174Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1175                                               Address PrivateAddr) {
1176  const DeclRefExpr *DE;
1177  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1178    BaseDecls.emplace_back(OrigVD);
1179    LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1180    LValue BaseLValue =
1181        loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1182                    OriginalBaseLValue);
1183    llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1184        BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1185    llvm::Value *PrivatePointer =
1186        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1187            PrivateAddr.getPointer(),
1188            SharedAddresses[N].first.getAddress(CGF).getType());
1189    llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1190    return castToBase(CGF, OrigVD->getType(),
1191                      SharedAddresses[N].first.getType(),
1192                      OriginalBaseLValue.getAddress(CGF).getType(),
1193                      OriginalBaseLValue.getAlignment(), Ptr);
1194  }
1195  BaseDecls.emplace_back(
1196      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1197  return PrivateAddr;
1198}
1199
1200bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1201  const OMPDeclareReductionDecl *DRD =
1202      getReductionInit(ClausesData[N].ReductionOp);
1203  return DRD && DRD->getInitializer();
1204}
1205
1206LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1207  return CGF.EmitLoadOfPointerLValue(
1208      CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1209      getThreadIDVariable()->getType()->castAs<PointerType>());
1210}
1211
1212void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1213  if (!CGF.HaveInsertPoint())
1214    return;
1215  // 1.2.2 OpenMP Language Terminology
1216  // Structured block - An executable statement with a single entry at the
1217  // top and a single exit at the bottom.
1218  // The point of exit cannot be a branch out of the structured block.
1219  // longjmp() and throw() must not violate the entry/exit criteria.
1220  CGF.EHStack.pushTerminate();
1221  CodeGen(CGF);
1222  CGF.EHStack.popTerminate();
1223}
1224
1225LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1226    CodeGenFunction &CGF) {
1227  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1228                            getThreadIDVariable()->getType(),
1229                            AlignmentSource::Decl);
1230}
1231
1232static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1233                                       QualType FieldTy) {
1234  auto *Field = FieldDecl::Create(
1235      C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1236      C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1237      /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1238  Field->setAccess(AS_public);
1239  DC->addDecl(Field);
1240  return Field;
1241}
1242
1243CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1244                                 StringRef Separator)
1245    : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1246      OffloadEntriesInfoManager(CGM) {
1247  ASTContext &C = CGM.getContext();
1248  RecordDecl *RD = C.buildImplicitRecord("ident_t");
1249  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1250  RD->startDefinition();
1251  // reserved_1
1252  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1253  // flags
1254  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1255  // reserved_2
1256  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1257  // reserved_3
1258  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1259  // psource
1260  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1261  RD->completeDefinition();
1262  IdentQTy = C.getRecordType(RD);
1263  IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1264  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1265
1266  loadOffloadInfoMetadata();
1267}
1268
1269bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
1270                                            const GlobalDecl &OldGD,
1271                                            llvm::GlobalValue *OrigAddr,
1272                                            bool IsForDefinition) {
1273  // Emit at least a definition for the aliasee if the the address of the
1274  // original function is requested.
1275  if (IsForDefinition || OrigAddr)
1276    (void)CGM.GetAddrOfGlobal(NewGD);
1277  StringRef NewMangledName = CGM.getMangledName(NewGD);
1278  llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
1279  if (Addr && !Addr->isDeclaration()) {
1280    const auto *D = cast<FunctionDecl>(OldGD.getDecl());
1281    const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD);
1282    llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
1283
1284    // Create a reference to the named value.  This ensures that it is emitted
1285    // if a deferred decl.
1286    llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
1287
1288    // Create the new alias itself, but don't set a name yet.
1289    auto *GA =
1290        llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
1291
1292    if (OrigAddr) {
1293      assert(OrigAddr->isDeclaration() && "Expected declaration");
1294
1295      GA->takeName(OrigAddr);
1296      OrigAddr->replaceAllUsesWith(
1297          llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
1298      OrigAddr->eraseFromParent();
1299    } else {
1300      GA->setName(CGM.getMangledName(OldGD));
1301    }
1302
1303    // Set attributes which are particular to an alias; this is a
1304    // specialization of the attributes which may be set on a global function.
1305    if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
1306        D->isWeakImported())
1307      GA->setLinkage(llvm::Function::WeakAnyLinkage);
1308
1309    CGM.SetCommonAttributes(OldGD, GA);
1310    return true;
1311  }
1312  return false;
1313}
1314
1315void CGOpenMPRuntime::clear() {
1316  InternalVars.clear();
1317  // Clean non-target variable declarations possibly used only in debug info.
1318  for (const auto &Data : EmittedNonTargetVariables) {
1319    if (!Data.getValue().pointsToAliveValue())
1320      continue;
1321    auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1322    if (!GV)
1323      continue;
1324    if (!GV->isDeclaration() || GV->getNumUses() > 0)
1325      continue;
1326    GV->eraseFromParent();
1327  }
1328  // Emit aliases for the deferred aliasees.
1329  for (const auto &Pair : DeferredVariantFunction) {
1330    StringRef MangledName = CGM.getMangledName(Pair.second.second);
1331    llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
1332    // If not able to emit alias, just emit original declaration.
1333    (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
1334                                /*IsForDefinition=*/false);
1335  }
1336}
1337
1338std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1339  SmallString<128> Buffer;
1340  llvm::raw_svector_ostream OS(Buffer);
1341  StringRef Sep = FirstSeparator;
1342  for (StringRef Part : Parts) {
1343    OS << Sep << Part;
1344    Sep = Separator;
1345  }
1346  return OS.str();
1347}
1348
1349static llvm::Function *
1350emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1351                          const Expr *CombinerInitializer, const VarDecl *In,
1352                          const VarDecl *Out, bool IsCombiner) {
1353  // void .omp_combiner.(Ty *in, Ty *out);
1354  ASTContext &C = CGM.getContext();
1355  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1356  FunctionArgList Args;
1357  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1358                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1359  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1360                              /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1361  Args.push_back(&OmpOutParm);
1362  Args.push_back(&OmpInParm);
1363  const CGFunctionInfo &FnInfo =
1364      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1365  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1366  std::string Name = CGM.getOpenMPRuntime().getName(
1367      {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1368  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1369                                    Name, &CGM.getModule());
1370  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1371  if (CGM.getLangOpts().Optimize) {
1372    Fn->removeFnAttr(llvm::Attribute::NoInline);
1373    Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1374    Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1375  }
1376  CodeGenFunction CGF(CGM);
1377  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1378  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1379  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1380                    Out->getLocation());
1381  CodeGenFunction::OMPPrivateScope Scope(CGF);
1382  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1383  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1384    return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1385        .getAddress(CGF);
1386  });
1387  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1388  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1389    return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1390        .getAddress(CGF);
1391  });
1392  (void)Scope.Privatize();
1393  if (!IsCombiner && Out->hasInit() &&
1394      !CGF.isTrivialInitializer(Out->getInit())) {
1395    CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1396                         Out->getType().getQualifiers(),
1397                         /*IsInitializer=*/true);
1398  }
1399  if (CombinerInitializer)
1400    CGF.EmitIgnoredExpr(CombinerInitializer);
1401  Scope.ForceCleanup();
1402  CGF.FinishFunction();
1403  return Fn;
1404}
1405
1406void CGOpenMPRuntime::emitUserDefinedReduction(
1407    CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1408  if (UDRMap.count(D) > 0)
1409    return;
1410  llvm::Function *Combiner = emitCombinerOrInitializer(
1411      CGM, D->getType(), D->getCombiner(),
1412      cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1413      cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1414      /*IsCombiner=*/true);
1415  llvm::Function *Initializer = nullptr;
1416  if (const Expr *Init = D->getInitializer()) {
1417    Initializer = emitCombinerOrInitializer(
1418        CGM, D->getType(),
1419        D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1420                                                                     : nullptr,
1421        cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1422        cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1423        /*IsCombiner=*/false);
1424  }
1425  UDRMap.try_emplace(D, Combiner, Initializer);
1426  if (CGF) {
1427    auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1428    Decls.second.push_back(D);
1429  }
1430}
1431
1432std::pair<llvm::Function *, llvm::Function *>
1433CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1434  auto I = UDRMap.find(D);
1435  if (I != UDRMap.end())
1436    return I->second;
1437  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1438  return UDRMap.lookup(D);
1439}
1440
1441namespace {
1442// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1443// Builder if one is present.
1444struct PushAndPopStackRAII {
1445  PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1446                      bool HasCancel)
1447      : OMPBuilder(OMPBuilder) {
1448    if (!OMPBuilder)
1449      return;
1450
1451    // The following callback is the crucial part of clangs cleanup process.
1452    //
1453    // NOTE:
1454    // Once the OpenMPIRBuilder is used to create parallel regions (and
1455    // similar), the cancellation destination (Dest below) is determined via
1456    // IP. That means if we have variables to finalize we split the block at IP,
1457    // use the new block (=BB) as destination to build a JumpDest (via
1458    // getJumpDestInCurrentScope(BB)) which then is fed to
1459    // EmitBranchThroughCleanup. Furthermore, there will not be the need
1460    // to push & pop an FinalizationInfo object.
1461    // The FiniCB will still be needed but at the point where the
1462    // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1463    auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1464      assert(IP.getBlock()->end() == IP.getPoint() &&
1465             "Clang CG should cause non-terminated block!");
1466      CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1467      CGF.Builder.restoreIP(IP);
1468      CodeGenFunction::JumpDest Dest =
1469          CGF.getOMPCancelDestination(OMPD_parallel);
1470      CGF.EmitBranchThroughCleanup(Dest);
1471    };
1472
1473    // TODO: Remove this once we emit parallel regions through the
1474    //       OpenMPIRBuilder as it can do this setup internally.
1475    llvm::OpenMPIRBuilder::FinalizationInfo FI(
1476        {FiniCB, OMPD_parallel, HasCancel});
1477    OMPBuilder->pushFinalizationCB(std::move(FI));
1478  }
1479  ~PushAndPopStackRAII() {
1480    if (OMPBuilder)
1481      OMPBuilder->popFinalizationCB();
1482  }
1483  llvm::OpenMPIRBuilder *OMPBuilder;
1484};
1485} // namespace
1486
1487static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1488    CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1489    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1490    const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1491  assert(ThreadIDVar->getType()->isPointerType() &&
1492         "thread id variable must be of type kmp_int32 *");
1493  CodeGenFunction CGF(CGM, true);
1494  bool HasCancel = false;
1495  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1496    HasCancel = OPD->hasCancel();
1497  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1498    HasCancel = OPSD->hasCancel();
1499  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1500    HasCancel = OPFD->hasCancel();
1501  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1502    HasCancel = OPFD->hasCancel();
1503  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1504    HasCancel = OPFD->hasCancel();
1505  else if (const auto *OPFD =
1506               dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1507    HasCancel = OPFD->hasCancel();
1508  else if (const auto *OPFD =
1509               dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1510    HasCancel = OPFD->hasCancel();
1511
1512  // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1513  //       parallel region to make cancellation barriers work properly.
1514  llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1515  PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1516  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1517                                    HasCancel, OutlinedHelperName);
1518  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1519  return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1520}
1521
1522llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1523    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1524    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1525  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1526  return emitParallelOrTeamsOutlinedFunction(
1527      CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1528}
1529
1530llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1531    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1532    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1533  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1534  return emitParallelOrTeamsOutlinedFunction(
1535      CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1536}
1537
1538llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1539    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1540    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1541    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1542    bool Tied, unsigned &NumberOfParts) {
1543  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1544                                              PrePostActionTy &) {
1545    llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1546    llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1547    llvm::Value *TaskArgs[] = {
1548        UpLoc, ThreadID,
1549        CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1550                                    TaskTVar->getType()->castAs<PointerType>())
1551            .getPointer(CGF)};
1552    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1553  };
1554  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1555                                                            UntiedCodeGen);
1556  CodeGen.setAction(Action);
1557  assert(!ThreadIDVar->getType()->isPointerType() &&
1558         "thread id variable must be of type kmp_int32 for tasks");
1559  const OpenMPDirectiveKind Region =
1560      isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1561                                                      : OMPD_task;
1562  const CapturedStmt *CS = D.getCapturedStmt(Region);
1563  const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1564  CodeGenFunction CGF(CGM, true);
1565  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1566                                        InnermostKind,
1567                                        TD ? TD->hasCancel() : false, Action);
1568  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1569  llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1570  if (!Tied)
1571    NumberOfParts = Action.getNumberOfParts();
1572  return Res;
1573}
1574
1575static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1576                             const RecordDecl *RD, const CGRecordLayout &RL,
1577                             ArrayRef<llvm::Constant *> Data) {
1578  llvm::StructType *StructTy = RL.getLLVMType();
1579  unsigned PrevIdx = 0;
1580  ConstantInitBuilder CIBuilder(CGM);
1581  auto DI = Data.begin();
1582  for (const FieldDecl *FD : RD->fields()) {
1583    unsigned Idx = RL.getLLVMFieldNo(FD);
1584    // Fill the alignment.
1585    for (unsigned I = PrevIdx; I < Idx; ++I)
1586      Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1587    PrevIdx = Idx + 1;
1588    Fields.add(*DI);
1589    ++DI;
1590  }
1591}
1592
1593template <class... As>
1594static llvm::GlobalVariable *
1595createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1596                   ArrayRef<llvm::Constant *> Data, const Twine &Name,
1597                   As &&... Args) {
1598  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1599  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1600  ConstantInitBuilder CIBuilder(CGM);
1601  ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1602  buildStructValue(Fields, CGM, RD, RL, Data);
1603  return Fields.finishAndCreateGlobal(
1604      Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1605      std::forward<As>(Args)...);
1606}
1607
1608template <typename T>
1609static void
1610createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1611                                         ArrayRef<llvm::Constant *> Data,
1612                                         T &Parent) {
1613  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1614  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1615  ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1616  buildStructValue(Fields, CGM, RD, RL, Data);
1617  Fields.finishAndAddTo(Parent);
1618}
1619
1620Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1621  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1622  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1623  FlagsTy FlagsKey(Flags, Reserved2Flags);
1624  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1625  if (!Entry) {
1626    if (!DefaultOpenMPPSource) {
1627      // Initialize default location for psource field of ident_t structure of
1628      // all ident_t objects. Format is ";file;function;line;column;;".
1629      // Taken from
1630      // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1631      DefaultOpenMPPSource =
1632          CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1633      DefaultOpenMPPSource =
1634          llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1635    }
1636
1637    llvm::Constant *Data[] = {
1638        llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1639        llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1640        llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1641        llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1642    llvm::GlobalValue *DefaultOpenMPLocation =
1643        createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1644                           llvm::GlobalValue::PrivateLinkage);
1645    DefaultOpenMPLocation->setUnnamedAddr(
1646        llvm::GlobalValue::UnnamedAddr::Global);
1647
1648    OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1649  }
1650  return Address(Entry, Align);
1651}
1652
1653void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1654                                             bool AtCurrentPoint) {
1655  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1656  assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1657
1658  llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1659  if (AtCurrentPoint) {
1660    Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1661        Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1662  } else {
1663    Elem.second.ServiceInsertPt =
1664        new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1665    Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1666  }
1667}
1668
1669void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1670  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1671  if (Elem.second.ServiceInsertPt) {
1672    llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1673    Elem.second.ServiceInsertPt = nullptr;
1674    Ptr->eraseFromParent();
1675  }
1676}
1677
1678llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1679                                                 SourceLocation Loc,
1680                                                 unsigned Flags) {
1681  Flags |= OMP_IDENT_KMPC;
1682  // If no debug info is generated - return global default location.
1683  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1684      Loc.isInvalid())
1685    return getOrCreateDefaultLocation(Flags).getPointer();
1686
1687  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1688
1689  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1690  Address LocValue = Address::invalid();
1691  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1692  if (I != OpenMPLocThreadIDMap.end())
1693    LocValue = Address(I->second.DebugLoc, Align);
1694
1695  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1696  // GetOpenMPThreadID was called before this routine.
1697  if (!LocValue.isValid()) {
1698    // Generate "ident_t .kmpc_loc.addr;"
1699    Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1700    auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1701    Elem.second.DebugLoc = AI.getPointer();
1702    LocValue = AI;
1703
1704    if (!Elem.second.ServiceInsertPt)
1705      setLocThreadIdInsertPt(CGF);
1706    CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1707    CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1708    CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1709                             CGF.getTypeSize(IdentQTy));
1710  }
1711
1712  // char **psource = &.kmpc_loc_<flags>.addr.psource;
1713  LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1714  auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1715  LValue PSource =
1716      CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1717
1718  llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1719  if (OMPDebugLoc == nullptr) {
1720    SmallString<128> Buffer2;
1721    llvm::raw_svector_ostream OS2(Buffer2);
1722    // Build debug location
1723    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1724    OS2 << ";" << PLoc.getFilename() << ";";
1725    if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1726      OS2 << FD->getQualifiedNameAsString();
1727    OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1728    OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1729    OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1730  }
1731  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1732  CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1733
1734  // Our callers always pass this to a runtime function, so for
1735  // convenience, go ahead and return a naked pointer.
1736  return LocValue.getPointer();
1737}
1738
1739llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1740                                          SourceLocation Loc) {
1741  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1742
1743  llvm::Value *ThreadID = nullptr;
1744  // Check whether we've already cached a load of the thread id in this
1745  // function.
1746  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1747  if (I != OpenMPLocThreadIDMap.end()) {
1748    ThreadID = I->second.ThreadID;
1749    if (ThreadID != nullptr)
1750      return ThreadID;
1751  }
1752  // If exceptions are enabled, do not use parameter to avoid possible crash.
1753  if (auto *OMPRegionInfo =
1754          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1755    if (OMPRegionInfo->getThreadIDVariable()) {
1756      // Check if this an outlined function with thread id passed as argument.
1757      LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1758      llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1759      if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1760          !CGF.getLangOpts().CXXExceptions ||
1761          CGF.Builder.GetInsertBlock() == TopBlock ||
1762          !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1763          cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1764              TopBlock ||
1765          cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1766              CGF.Builder.GetInsertBlock()) {
1767        ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1768        // If value loaded in entry block, cache it and use it everywhere in
1769        // function.
1770        if (CGF.Builder.GetInsertBlock() == TopBlock) {
1771          auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1772          Elem.second.ThreadID = ThreadID;
1773        }
1774        return ThreadID;
1775      }
1776    }
1777  }
1778
1779  // This is not an outlined function region - need to call __kmpc_int32
1780  // kmpc_global_thread_num(ident_t *loc).
1781  // Generate thread id value and cache this value for use across the
1782  // function.
1783  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1784  if (!Elem.second.ServiceInsertPt)
1785    setLocThreadIdInsertPt(CGF);
1786  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1787  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1788  llvm::CallInst *Call = CGF.Builder.CreateCall(
1789      createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1790      emitUpdateLocation(CGF, Loc));
1791  Call->setCallingConv(CGF.getRuntimeCC());
1792  Elem.second.ThreadID = Call;
1793  return Call;
1794}
1795
1796void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1797  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1798  if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1799    clearLocThreadIdInsertPt(CGF);
1800    OpenMPLocThreadIDMap.erase(CGF.CurFn);
1801  }
1802  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1803    for(auto *D : FunctionUDRMap[CGF.CurFn])
1804      UDRMap.erase(D);
1805    FunctionUDRMap.erase(CGF.CurFn);
1806  }
1807  auto I = FunctionUDMMap.find(CGF.CurFn);
1808  if (I != FunctionUDMMap.end()) {
1809    for(auto *D : I->second)
1810      UDMMap.erase(D);
1811    FunctionUDMMap.erase(I);
1812  }
1813}
1814
1815llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1816  return IdentTy->getPointerTo();
1817}
1818
1819llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1820  if (!Kmpc_MicroTy) {
1821    // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1822    llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1823                                 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1824    Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1825  }
1826  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1827}
1828
1829llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1830  llvm::FunctionCallee RTLFn = nullptr;
1831  switch (static_cast<OpenMPRTLFunction>(Function)) {
1832  case OMPRTL__kmpc_fork_call: {
1833    // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1834    // microtask, ...);
1835    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1836                                getKmpc_MicroPointerTy()};
1837    auto *FnTy =
1838        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1839    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1840    if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1841      if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1842        llvm::LLVMContext &Ctx = F->getContext();
1843        llvm::MDBuilder MDB(Ctx);
1844        // Annotate the callback behavior of the __kmpc_fork_call:
1845        //  - The callback callee is argument number 2 (microtask).
1846        //  - The first two arguments of the callback callee are unknown (-1).
1847        //  - All variadic arguments to the __kmpc_fork_call are passed to the
1848        //    callback callee.
1849        F->addMetadata(
1850            llvm::LLVMContext::MD_callback,
1851            *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1852                                        2, {-1, -1},
1853                                        /* VarArgsArePassed */ true)}));
1854      }
1855    }
1856    break;
1857  }
1858  case OMPRTL__kmpc_global_thread_num: {
1859    // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1860    llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1861    auto *FnTy =
1862        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1863    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1864    break;
1865  }
1866  case OMPRTL__kmpc_threadprivate_cached: {
1867    // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1868    // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1869    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1870                                CGM.VoidPtrTy, CGM.SizeTy,
1871                                CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1872    auto *FnTy =
1873        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1874    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1875    break;
1876  }
1877  case OMPRTL__kmpc_critical: {
1878    // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1879    // kmp_critical_name *crit);
1880    llvm::Type *TypeParams[] = {
1881        getIdentTyPointerTy(), CGM.Int32Ty,
1882        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1883    auto *FnTy =
1884        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1885    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1886    break;
1887  }
1888  case OMPRTL__kmpc_critical_with_hint: {
1889    // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1890    // kmp_critical_name *crit, uintptr_t hint);
1891    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1892                                llvm::PointerType::getUnqual(KmpCriticalNameTy),
1893                                CGM.IntPtrTy};
1894    auto *FnTy =
1895        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1896    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1897    break;
1898  }
1899  case OMPRTL__kmpc_threadprivate_register: {
1900    // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1901    // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1902    // typedef void *(*kmpc_ctor)(void *);
1903    auto *KmpcCtorTy =
1904        llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1905                                /*isVarArg*/ false)->getPointerTo();
1906    // typedef void *(*kmpc_cctor)(void *, void *);
1907    llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1908    auto *KmpcCopyCtorTy =
1909        llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1910                                /*isVarArg*/ false)
1911            ->getPointerTo();
1912    // typedef void (*kmpc_dtor)(void *);
1913    auto *KmpcDtorTy =
1914        llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1915            ->getPointerTo();
1916    llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1917                              KmpcCopyCtorTy, KmpcDtorTy};
1918    auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1919                                        /*isVarArg*/ false);
1920    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1921    break;
1922  }
1923  case OMPRTL__kmpc_end_critical: {
1924    // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1925    // kmp_critical_name *crit);
1926    llvm::Type *TypeParams[] = {
1927        getIdentTyPointerTy(), CGM.Int32Ty,
1928        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1929    auto *FnTy =
1930        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1931    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1932    break;
1933  }
1934  case OMPRTL__kmpc_cancel_barrier: {
1935    // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1936    // global_tid);
1937    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1938    auto *FnTy =
1939        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1940    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1941    break;
1942  }
1943  case OMPRTL__kmpc_barrier: {
1944    // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1945    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1946    auto *FnTy =
1947        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1948    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1949    break;
1950  }
1951  case OMPRTL__kmpc_for_static_fini: {
1952    // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1953    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1954    auto *FnTy =
1955        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1956    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1957    break;
1958  }
1959  case OMPRTL__kmpc_push_num_threads: {
1960    // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1961    // kmp_int32 num_threads)
1962    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1963                                CGM.Int32Ty};
1964    auto *FnTy =
1965        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1966    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1967    break;
1968  }
1969  case OMPRTL__kmpc_serialized_parallel: {
1970    // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1971    // global_tid);
1972    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1973    auto *FnTy =
1974        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1975    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1976    break;
1977  }
1978  case OMPRTL__kmpc_end_serialized_parallel: {
1979    // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1980    // global_tid);
1981    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1982    auto *FnTy =
1983        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1984    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1985    break;
1986  }
1987  case OMPRTL__kmpc_flush: {
1988    // Build void __kmpc_flush(ident_t *loc);
1989    llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1990    auto *FnTy =
1991        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1992    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1993    break;
1994  }
1995  case OMPRTL__kmpc_master: {
1996    // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1997    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1998    auto *FnTy =
1999        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2000    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
2001    break;
2002  }
2003  case OMPRTL__kmpc_end_master: {
2004    // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
2005    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2006    auto *FnTy =
2007        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2008    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
2009    break;
2010  }
2011  case OMPRTL__kmpc_omp_taskyield: {
2012    // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
2013    // int end_part);
2014    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2015    auto *FnTy =
2016        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2017    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
2018    break;
2019  }
2020  case OMPRTL__kmpc_single: {
2021    // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
2022    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2023    auto *FnTy =
2024        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2025    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
2026    break;
2027  }
2028  case OMPRTL__kmpc_end_single: {
2029    // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
2030    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2031    auto *FnTy =
2032        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2033    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
2034    break;
2035  }
2036  case OMPRTL__kmpc_omp_task_alloc: {
2037    // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2038    // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2039    // kmp_routine_entry_t *task_entry);
2040    assert(KmpRoutineEntryPtrTy != nullptr &&
2041           "Type kmp_routine_entry_t must be created.");
2042    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2043                                CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
2044    // Return void * and then cast to particular kmp_task_t type.
2045    auto *FnTy =
2046        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2047    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
2048    break;
2049  }
2050  case OMPRTL__kmpc_omp_target_task_alloc: {
2051    // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
2052    // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2053    // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2054    assert(KmpRoutineEntryPtrTy != nullptr &&
2055           "Type kmp_routine_entry_t must be created.");
2056    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2057                                CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2058                                CGM.Int64Ty};
2059    // Return void * and then cast to particular kmp_task_t type.
2060    auto *FnTy =
2061        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2062    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2063    break;
2064  }
2065  case OMPRTL__kmpc_omp_task: {
2066    // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2067    // *new_task);
2068    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2069                                CGM.VoidPtrTy};
2070    auto *FnTy =
2071        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2072    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2073    break;
2074  }
2075  case OMPRTL__kmpc_copyprivate: {
2076    // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2077    // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2078    // kmp_int32 didit);
2079    llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2080    auto *CpyFnTy =
2081        llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2082    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2083                                CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2084                                CGM.Int32Ty};
2085    auto *FnTy =
2086        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2087    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2088    break;
2089  }
2090  case OMPRTL__kmpc_reduce: {
2091    // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2092    // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2093    // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2094    llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2095    auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2096                                               /*isVarArg=*/false);
2097    llvm::Type *TypeParams[] = {
2098        getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2099        CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2100        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2101    auto *FnTy =
2102        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2103    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2104    break;
2105  }
2106  case OMPRTL__kmpc_reduce_nowait: {
2107    // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2108    // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2109    // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2110    // *lck);
2111    llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2112    auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2113                                               /*isVarArg=*/false);
2114    llvm::Type *TypeParams[] = {
2115        getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2116        CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2117        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2118    auto *FnTy =
2119        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2120    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2121    break;
2122  }
2123  case OMPRTL__kmpc_end_reduce: {
2124    // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2125    // kmp_critical_name *lck);
2126    llvm::Type *TypeParams[] = {
2127        getIdentTyPointerTy(), CGM.Int32Ty,
2128        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2129    auto *FnTy =
2130        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2131    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2132    break;
2133  }
2134  case OMPRTL__kmpc_end_reduce_nowait: {
2135    // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2136    // kmp_critical_name *lck);
2137    llvm::Type *TypeParams[] = {
2138        getIdentTyPointerTy(), CGM.Int32Ty,
2139        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2140    auto *FnTy =
2141        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2142    RTLFn =
2143        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2144    break;
2145  }
2146  case OMPRTL__kmpc_omp_task_begin_if0: {
2147    // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2148    // *new_task);
2149    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2150                                CGM.VoidPtrTy};
2151    auto *FnTy =
2152        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2153    RTLFn =
2154        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2155    break;
2156  }
2157  case OMPRTL__kmpc_omp_task_complete_if0: {
2158    // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2159    // *new_task);
2160    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2161                                CGM.VoidPtrTy};
2162    auto *FnTy =
2163        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2164    RTLFn = CGM.CreateRuntimeFunction(FnTy,
2165                                      /*Name=*/"__kmpc_omp_task_complete_if0");
2166    break;
2167  }
2168  case OMPRTL__kmpc_ordered: {
2169    // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2170    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2171    auto *FnTy =
2172        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2173    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2174    break;
2175  }
2176  case OMPRTL__kmpc_end_ordered: {
2177    // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2178    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2179    auto *FnTy =
2180        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2181    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2182    break;
2183  }
2184  case OMPRTL__kmpc_omp_taskwait: {
2185    // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2186    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2187    auto *FnTy =
2188        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2189    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2190    break;
2191  }
2192  case OMPRTL__kmpc_taskgroup: {
2193    // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2194    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2195    auto *FnTy =
2196        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2197    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2198    break;
2199  }
2200  case OMPRTL__kmpc_end_taskgroup: {
2201    // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2202    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2203    auto *FnTy =
2204        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2205    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2206    break;
2207  }
2208  case OMPRTL__kmpc_push_proc_bind: {
2209    // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2210    // int proc_bind)
2211    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2212    auto *FnTy =
2213        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2214    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2215    break;
2216  }
2217  case OMPRTL__kmpc_omp_task_with_deps: {
2218    // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2219    // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2220    // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2221    llvm::Type *TypeParams[] = {
2222        getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2223        CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2224    auto *FnTy =
2225        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2226    RTLFn =
2227        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2228    break;
2229  }
2230  case OMPRTL__kmpc_omp_wait_deps: {
2231    // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2232    // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2233    // kmp_depend_info_t *noalias_dep_list);
2234    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2235                                CGM.Int32Ty,           CGM.VoidPtrTy,
2236                                CGM.Int32Ty,           CGM.VoidPtrTy};
2237    auto *FnTy =
2238        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2239    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2240    break;
2241  }
2242  case OMPRTL__kmpc_cancellationpoint: {
2243    // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2244    // global_tid, kmp_int32 cncl_kind)
2245    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2246    auto *FnTy =
2247        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2248    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2249    break;
2250  }
2251  case OMPRTL__kmpc_cancel: {
2252    // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2253    // kmp_int32 cncl_kind)
2254    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2255    auto *FnTy =
2256        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2257    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2258    break;
2259  }
2260  case OMPRTL__kmpc_push_num_teams: {
2261    // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2262    // kmp_int32 num_teams, kmp_int32 num_threads)
2263    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2264        CGM.Int32Ty};
2265    auto *FnTy =
2266        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2267    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2268    break;
2269  }
2270  case OMPRTL__kmpc_fork_teams: {
2271    // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2272    // microtask, ...);
2273    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2274                                getKmpc_MicroPointerTy()};
2275    auto *FnTy =
2276        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2277    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2278    if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2279      if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2280        llvm::LLVMContext &Ctx = F->getContext();
2281        llvm::MDBuilder MDB(Ctx);
2282        // Annotate the callback behavior of the __kmpc_fork_teams:
2283        //  - The callback callee is argument number 2 (microtask).
2284        //  - The first two arguments of the callback callee are unknown (-1).
2285        //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2286        //    callback callee.
2287        F->addMetadata(
2288            llvm::LLVMContext::MD_callback,
2289            *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2290                                        2, {-1, -1},
2291                                        /* VarArgsArePassed */ true)}));
2292      }
2293    }
2294    break;
2295  }
2296  case OMPRTL__kmpc_taskloop: {
2297    // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2298    // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2299    // sched, kmp_uint64 grainsize, void *task_dup);
2300    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2301                                CGM.IntTy,
2302                                CGM.VoidPtrTy,
2303                                CGM.IntTy,
2304                                CGM.Int64Ty->getPointerTo(),
2305                                CGM.Int64Ty->getPointerTo(),
2306                                CGM.Int64Ty,
2307                                CGM.IntTy,
2308                                CGM.IntTy,
2309                                CGM.Int64Ty,
2310                                CGM.VoidPtrTy};
2311    auto *FnTy =
2312        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2313    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2314    break;
2315  }
2316  case OMPRTL__kmpc_doacross_init: {
2317    // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2318    // num_dims, struct kmp_dim *dims);
2319    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2320                                CGM.Int32Ty,
2321                                CGM.Int32Ty,
2322                                CGM.VoidPtrTy};
2323    auto *FnTy =
2324        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2325    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2326    break;
2327  }
2328  case OMPRTL__kmpc_doacross_fini: {
2329    // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2330    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2331    auto *FnTy =
2332        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2333    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2334    break;
2335  }
2336  case OMPRTL__kmpc_doacross_post: {
2337    // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2338    // *vec);
2339    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2340                                CGM.Int64Ty->getPointerTo()};
2341    auto *FnTy =
2342        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2343    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2344    break;
2345  }
2346  case OMPRTL__kmpc_doacross_wait: {
2347    // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2348    // *vec);
2349    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2350                                CGM.Int64Ty->getPointerTo()};
2351    auto *FnTy =
2352        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2353    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2354    break;
2355  }
2356  case OMPRTL__kmpc_task_reduction_init: {
2357    // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2358    // *data);
2359    llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2360    auto *FnTy =
2361        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2362    RTLFn =
2363        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2364    break;
2365  }
2366  case OMPRTL__kmpc_task_reduction_get_th_data: {
2367    // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2368    // *d);
2369    llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2370    auto *FnTy =
2371        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2372    RTLFn = CGM.CreateRuntimeFunction(
2373        FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2374    break;
2375  }
2376  case OMPRTL__kmpc_alloc: {
2377    // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2378    // al); omp_allocator_handle_t type is void *.
2379    llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2380    auto *FnTy =
2381        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2382    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2383    break;
2384  }
2385  case OMPRTL__kmpc_free: {
2386    // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2387    // al); omp_allocator_handle_t type is void *.
2388    llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2389    auto *FnTy =
2390        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2391    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2392    break;
2393  }
2394  case OMPRTL__kmpc_push_target_tripcount: {
2395    // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2396    // size);
2397    llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2398    llvm::FunctionType *FnTy =
2399        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2400    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2401    break;
2402  }
2403  case OMPRTL__tgt_target: {
2404    // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2405    // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2406    // *arg_types);
2407    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2408                                CGM.VoidPtrTy,
2409                                CGM.Int32Ty,
2410                                CGM.VoidPtrPtrTy,
2411                                CGM.VoidPtrPtrTy,
2412                                CGM.Int64Ty->getPointerTo(),
2413                                CGM.Int64Ty->getPointerTo()};
2414    auto *FnTy =
2415        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2416    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2417    break;
2418  }
2419  case OMPRTL__tgt_target_nowait: {
2420    // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2421    // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2422    // int64_t *arg_types);
2423    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2424                                CGM.VoidPtrTy,
2425                                CGM.Int32Ty,
2426                                CGM.VoidPtrPtrTy,
2427                                CGM.VoidPtrPtrTy,
2428                                CGM.Int64Ty->getPointerTo(),
2429                                CGM.Int64Ty->getPointerTo()};
2430    auto *FnTy =
2431        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2432    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2433    break;
2434  }
2435  case OMPRTL__tgt_target_teams: {
2436    // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2437    // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2438    // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2439    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2440                                CGM.VoidPtrTy,
2441                                CGM.Int32Ty,
2442                                CGM.VoidPtrPtrTy,
2443                                CGM.VoidPtrPtrTy,
2444                                CGM.Int64Ty->getPointerTo(),
2445                                CGM.Int64Ty->getPointerTo(),
2446                                CGM.Int32Ty,
2447                                CGM.Int32Ty};
2448    auto *FnTy =
2449        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2450    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2451    break;
2452  }
2453  case OMPRTL__tgt_target_teams_nowait: {
2454    // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2455    // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2456    // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2457    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2458                                CGM.VoidPtrTy,
2459                                CGM.Int32Ty,
2460                                CGM.VoidPtrPtrTy,
2461                                CGM.VoidPtrPtrTy,
2462                                CGM.Int64Ty->getPointerTo(),
2463                                CGM.Int64Ty->getPointerTo(),
2464                                CGM.Int32Ty,
2465                                CGM.Int32Ty};
2466    auto *FnTy =
2467        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2468    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2469    break;
2470  }
2471  case OMPRTL__tgt_register_requires: {
2472    // Build void __tgt_register_requires(int64_t flags);
2473    llvm::Type *TypeParams[] = {CGM.Int64Ty};
2474    auto *FnTy =
2475        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2476    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2477    break;
2478  }
2479  case OMPRTL__tgt_target_data_begin: {
2480    // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2481    // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2482    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2483                                CGM.Int32Ty,
2484                                CGM.VoidPtrPtrTy,
2485                                CGM.VoidPtrPtrTy,
2486                                CGM.Int64Ty->getPointerTo(),
2487                                CGM.Int64Ty->getPointerTo()};
2488    auto *FnTy =
2489        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2490    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2491    break;
2492  }
2493  case OMPRTL__tgt_target_data_begin_nowait: {
2494    // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2495    // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2496    // *arg_types);
2497    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2498                                CGM.Int32Ty,
2499                                CGM.VoidPtrPtrTy,
2500                                CGM.VoidPtrPtrTy,
2501                                CGM.Int64Ty->getPointerTo(),
2502                                CGM.Int64Ty->getPointerTo()};
2503    auto *FnTy =
2504        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2505    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2506    break;
2507  }
2508  case OMPRTL__tgt_target_data_end: {
2509    // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2510    // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2511    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2512                                CGM.Int32Ty,
2513                                CGM.VoidPtrPtrTy,
2514                                CGM.VoidPtrPtrTy,
2515                                CGM.Int64Ty->getPointerTo(),
2516                                CGM.Int64Ty->getPointerTo()};
2517    auto *FnTy =
2518        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2519    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2520    break;
2521  }
2522  case OMPRTL__tgt_target_data_end_nowait: {
2523    // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2524    // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2525    // *arg_types);
2526    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2527                                CGM.Int32Ty,
2528                                CGM.VoidPtrPtrTy,
2529                                CGM.VoidPtrPtrTy,
2530                                CGM.Int64Ty->getPointerTo(),
2531                                CGM.Int64Ty->getPointerTo()};
2532    auto *FnTy =
2533        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2534    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2535    break;
2536  }
2537  case OMPRTL__tgt_target_data_update: {
2538    // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2539    // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2540    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2541                                CGM.Int32Ty,
2542                                CGM.VoidPtrPtrTy,
2543                                CGM.VoidPtrPtrTy,
2544                                CGM.Int64Ty->getPointerTo(),
2545                                CGM.Int64Ty->getPointerTo()};
2546    auto *FnTy =
2547        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2548    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2549    break;
2550  }
2551  case OMPRTL__tgt_target_data_update_nowait: {
2552    // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2553    // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2554    // *arg_types);
2555    llvm::Type *TypeParams[] = {CGM.Int64Ty,
2556                                CGM.Int32Ty,
2557                                CGM.VoidPtrPtrTy,
2558                                CGM.VoidPtrPtrTy,
2559                                CGM.Int64Ty->getPointerTo(),
2560                                CGM.Int64Ty->getPointerTo()};
2561    auto *FnTy =
2562        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2563    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2564    break;
2565  }
2566  case OMPRTL__tgt_mapper_num_components: {
2567    // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2568    llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2569    auto *FnTy =
2570        llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2571    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2572    break;
2573  }
2574  case OMPRTL__tgt_push_mapper_component: {
2575    // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2576    // *base, void *begin, int64_t size, int64_t type);
2577    llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2578                                CGM.Int64Ty, CGM.Int64Ty};
2579    auto *FnTy =
2580        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2581    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2582    break;
2583  }
2584  }
2585  assert(RTLFn && "Unable to find OpenMP runtime function");
2586  return RTLFn;
2587}
2588
2589llvm::FunctionCallee
2590CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2591  assert((IVSize == 32 || IVSize == 64) &&
2592         "IV size is not compatible with the omp runtime");
2593  StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2594                                            : "__kmpc_for_static_init_4u")
2595                                : (IVSigned ? "__kmpc_for_static_init_8"
2596                                            : "__kmpc_for_static_init_8u");
2597  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2598  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2599  llvm::Type *TypeParams[] = {
2600    getIdentTyPointerTy(),                     // loc
2601    CGM.Int32Ty,                               // tid
2602    CGM.Int32Ty,                               // schedtype
2603    llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2604    PtrTy,                                     // p_lower
2605    PtrTy,                                     // p_upper
2606    PtrTy,                                     // p_stride
2607    ITy,                                       // incr
2608    ITy                                        // chunk
2609  };
2610  auto *FnTy =
2611      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2612  return CGM.CreateRuntimeFunction(FnTy, Name);
2613}
2614
2615llvm::FunctionCallee
2616CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2617  assert((IVSize == 32 || IVSize == 64) &&
2618         "IV size is not compatible with the omp runtime");
2619  StringRef Name =
2620      IVSize == 32
2621          ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2622          : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2623  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2624  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2625                               CGM.Int32Ty,           // tid
2626                               CGM.Int32Ty,           // schedtype
2627                               ITy,                   // lower
2628                               ITy,                   // upper
2629                               ITy,                   // stride
2630                               ITy                    // chunk
2631  };
2632  auto *FnTy =
2633      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2634  return CGM.CreateRuntimeFunction(FnTy, Name);
2635}
2636
2637llvm::FunctionCallee
2638CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2639  assert((IVSize == 32 || IVSize == 64) &&
2640         "IV size is not compatible with the omp runtime");
2641  StringRef Name =
2642      IVSize == 32
2643          ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2644          : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2645  llvm::Type *TypeParams[] = {
2646      getIdentTyPointerTy(), // loc
2647      CGM.Int32Ty,           // tid
2648  };
2649  auto *FnTy =
2650      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2651  return CGM.CreateRuntimeFunction(FnTy, Name);
2652}
2653
2654llvm::FunctionCallee
2655CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2656  assert((IVSize == 32 || IVSize == 64) &&
2657         "IV size is not compatible with the omp runtime");
2658  StringRef Name =
2659      IVSize == 32
2660          ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2661          : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2662  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2663  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2664  llvm::Type *TypeParams[] = {
2665    getIdentTyPointerTy(),                     // loc
2666    CGM.Int32Ty,                               // tid
2667    llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2668    PtrTy,                                     // p_lower
2669    PtrTy,                                     // p_upper
2670    PtrTy                                      // p_stride
2671  };
2672  auto *FnTy =
2673      llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2674  return CGM.CreateRuntimeFunction(FnTy, Name);
2675}
2676
2677/// Obtain information that uniquely identifies a target entry. This
2678/// consists of the file and device IDs as well as line number associated with
2679/// the relevant entry source location.
2680static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2681                                     unsigned &DeviceID, unsigned &FileID,
2682                                     unsigned &LineNum) {
2683  SourceManager &SM = C.getSourceManager();
2684
2685  // The loc should be always valid and have a file ID (the user cannot use
2686  // #pragma directives in macros)
2687
2688  assert(Loc.isValid() && "Source location is expected to be always valid.");
2689
2690  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2691  assert(PLoc.isValid() && "Source location is expected to be always valid.");
2692
2693  llvm::sys::fs::UniqueID ID;
2694  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2695    SM.getDiagnostics().Report(diag::err_cannot_open_file)
2696        << PLoc.getFilename() << EC.message();
2697
2698  DeviceID = ID.getDevice();
2699  FileID = ID.getFile();
2700  LineNum = PLoc.getLine();
2701}
2702
2703Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2704  if (CGM.getLangOpts().OpenMPSimd)
2705    return Address::invalid();
2706  llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2707      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2708  if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2709              (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2710               HasRequiresUnifiedSharedMemory))) {
2711    SmallString<64> PtrName;
2712    {
2713      llvm::raw_svector_ostream OS(PtrName);
2714      OS << CGM.getMangledName(GlobalDecl(VD));
2715      if (!VD->isExternallyVisible()) {
2716        unsigned DeviceID, FileID, Line;
2717        getTargetEntryUniqueInfo(CGM.getContext(),
2718                                 VD->getCanonicalDecl()->getBeginLoc(),
2719                                 DeviceID, FileID, Line);
2720        OS << llvm::format("_%x", FileID);
2721      }
2722      OS << "_decl_tgt_ref_ptr";
2723    }
2724    llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2725    if (!Ptr) {
2726      QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2727      Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2728                                        PtrName);
2729
2730      auto *GV = cast<llvm::GlobalVariable>(Ptr);
2731      GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2732
2733      if (!CGM.getLangOpts().OpenMPIsDevice)
2734        GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2735      registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2736    }
2737    return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2738  }
2739  return Address::invalid();
2740}
2741
2742llvm::Constant *
2743CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2744  assert(!CGM.getLangOpts().OpenMPUseTLS ||
2745         !CGM.getContext().getTargetInfo().isTLSSupported());
2746  // Lookup the entry, lazily creating it if necessary.
2747  std::string Suffix = getName({"cache", ""});
2748  return getOrCreateInternalVariable(
2749      CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2750}
2751
2752Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2753                                                const VarDecl *VD,
2754                                                Address VDAddr,
2755                                                SourceLocation Loc) {
2756  if (CGM.getLangOpts().OpenMPUseTLS &&
2757      CGM.getContext().getTargetInfo().isTLSSupported())
2758    return VDAddr;
2759
2760  llvm::Type *VarTy = VDAddr.getElementType();
2761  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2762                         CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2763                                                       CGM.Int8PtrTy),
2764                         CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2765                         getOrCreateThreadPrivateCache(VD)};
2766  return Address(CGF.EmitRuntimeCall(
2767      createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2768                 VDAddr.getAlignment());
2769}
2770
2771void CGOpenMPRuntime::emitThreadPrivateVarInit(
2772    CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2773    llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2774  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2775  // library.
2776  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2777  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2778                      OMPLoc);
2779  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2780  // to register constructor/destructor for variable.
2781  llvm::Value *Args[] = {
2782      OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2783      Ctor, CopyCtor, Dtor};
2784  CGF.EmitRuntimeCall(
2785      createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2786}
2787
2788llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2789    const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2790    bool PerformInit, CodeGenFunction *CGF) {
2791  if (CGM.getLangOpts().OpenMPUseTLS &&
2792      CGM.getContext().getTargetInfo().isTLSSupported())
2793    return nullptr;
2794
2795  VD = VD->getDefinition(CGM.getContext());
2796  if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2797    QualType ASTTy = VD->getType();
2798
2799    llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2800    const Expr *Init = VD->getAnyInitializer();
2801    if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2802      // Generate function that re-emits the declaration's initializer into the
2803      // threadprivate copy of the variable VD
2804      CodeGenFunction CtorCGF(CGM);
2805      FunctionArgList Args;
2806      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2807                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2808                            ImplicitParamDecl::Other);
2809      Args.push_back(&Dst);
2810
2811      const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2812          CGM.getContext().VoidPtrTy, Args);
2813      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2814      std::string Name = getName({"__kmpc_global_ctor_", ""});
2815      llvm::Function *Fn =
2816          CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2817      CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2818                            Args, Loc, Loc);
2819      llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2820          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2821          CGM.getContext().VoidPtrTy, Dst.getLocation());
2822      Address Arg = Address(ArgVal, VDAddr.getAlignment());
2823      Arg = CtorCGF.Builder.CreateElementBitCast(
2824          Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2825      CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2826                               /*IsInitializer=*/true);
2827      ArgVal = CtorCGF.EmitLoadOfScalar(
2828          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2829          CGM.getContext().VoidPtrTy, Dst.getLocation());
2830      CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2831      CtorCGF.FinishFunction();
2832      Ctor = Fn;
2833    }
2834    if (VD->getType().isDestructedType() != QualType::DK_none) {
2835      // Generate function that emits destructor call for the threadprivate copy
2836      // of the variable VD
2837      CodeGenFunction DtorCGF(CGM);
2838      FunctionArgList Args;
2839      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2840                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2841                            ImplicitParamDecl::Other);
2842      Args.push_back(&Dst);
2843
2844      const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2845          CGM.getContext().VoidTy, Args);
2846      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2847      std::string Name = getName({"__kmpc_global_dtor_", ""});
2848      llvm::Function *Fn =
2849          CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2850      auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2851      DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2852                            Loc, Loc);
2853      // Create a scope with an artificial location for the body of this function.
2854      auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2855      llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2856          DtorCGF.GetAddrOfLocalVar(&Dst),
2857          /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2858      DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2859                          DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2860                          DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2861      DtorCGF.FinishFunction();
2862      Dtor = Fn;
2863    }
2864    // Do not emit init function if it is not required.
2865    if (!Ctor && !Dtor)
2866      return nullptr;
2867
2868    llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2869    auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2870                                               /*isVarArg=*/false)
2871                           ->getPointerTo();
2872    // Copying constructor for the threadprivate variable.
2873    // Must be NULL - reserved by runtime, but currently it requires that this
2874    // parameter is always NULL. Otherwise it fires assertion.
2875    CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2876    if (Ctor == nullptr) {
2877      auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2878                                             /*isVarArg=*/false)
2879                         ->getPointerTo();
2880      Ctor = llvm::Constant::getNullValue(CtorTy);
2881    }
2882    if (Dtor == nullptr) {
2883      auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2884                                             /*isVarArg=*/false)
2885                         ->getPointerTo();
2886      Dtor = llvm::Constant::getNullValue(DtorTy);
2887    }
2888    if (!CGF) {
2889      auto *InitFunctionTy =
2890          llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2891      std::string Name = getName({"__omp_threadprivate_init_", ""});
2892      llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2893          InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2894      CodeGenFunction InitCGF(CGM);
2895      FunctionArgList ArgList;
2896      InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2897                            CGM.getTypes().arrangeNullaryFunction(), ArgList,
2898                            Loc, Loc);
2899      emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2900      InitCGF.FinishFunction();
2901      return InitFunction;
2902    }
2903    emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2904  }
2905  return nullptr;
2906}
2907
2908bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2909                                                     llvm::GlobalVariable *Addr,
2910                                                     bool PerformInit) {
2911  if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2912      !CGM.getLangOpts().OpenMPIsDevice)
2913    return false;
2914  Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2915      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2916  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2917      (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2918       HasRequiresUnifiedSharedMemory))
2919    return CGM.getLangOpts().OpenMPIsDevice;
2920  VD = VD->getDefinition(CGM.getContext());
2921  if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2922    return CGM.getLangOpts().OpenMPIsDevice;
2923
2924  QualType ASTTy = VD->getType();
2925
2926  SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2927  // Produce the unique prefix to identify the new target regions. We use
2928  // the source location of the variable declaration which we know to not
2929  // conflict with any target region.
2930  unsigned DeviceID;
2931  unsigned FileID;
2932  unsigned Line;
2933  getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2934  SmallString<128> Buffer, Out;
2935  {
2936    llvm::raw_svector_ostream OS(Buffer);
2937    OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2938       << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2939  }
2940
2941  const Expr *Init = VD->getAnyInitializer();
2942  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2943    llvm::Constant *Ctor;
2944    llvm::Constant *ID;
2945    if (CGM.getLangOpts().OpenMPIsDevice) {
2946      // Generate function that re-emits the declaration's initializer into
2947      // the threadprivate copy of the variable VD
2948      CodeGenFunction CtorCGF(CGM);
2949
2950      const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2951      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2952      llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2953          FTy, Twine(Buffer, "_ctor"), FI, Loc);
2954      auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2955      CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2956                            FunctionArgList(), Loc, Loc);
2957      auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2958      CtorCGF.EmitAnyExprToMem(Init,
2959                               Address(Addr, CGM.getContext().getDeclAlign(VD)),
2960                               Init->getType().getQualifiers(),
2961                               /*IsInitializer=*/true);
2962      CtorCGF.FinishFunction();
2963      Ctor = Fn;
2964      ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2965      CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2966    } else {
2967      Ctor = new llvm::GlobalVariable(
2968          CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2969          llvm::GlobalValue::PrivateLinkage,
2970          llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2971      ID = Ctor;
2972    }
2973
2974    // Register the information for the entry associated with the constructor.
2975    Out.clear();
2976    OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2977        DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2978        ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2979  }
2980  if (VD->getType().isDestructedType() != QualType::DK_none) {
2981    llvm::Constant *Dtor;
2982    llvm::Constant *ID;
2983    if (CGM.getLangOpts().OpenMPIsDevice) {
2984      // Generate function that emits destructor call for the threadprivate
2985      // copy of the variable VD
2986      CodeGenFunction DtorCGF(CGM);
2987
2988      const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2989      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2990      llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2991          FTy, Twine(Buffer, "_dtor"), FI, Loc);
2992      auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2993      DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2994                            FunctionArgList(), Loc, Loc);
2995      // Create a scope with an artificial location for the body of this
2996      // function.
2997      auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2998      DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2999                          ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
3000                          DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
3001      DtorCGF.FinishFunction();
3002      Dtor = Fn;
3003      ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
3004      CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
3005    } else {
3006      Dtor = new llvm::GlobalVariable(
3007          CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3008          llvm::GlobalValue::PrivateLinkage,
3009          llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
3010      ID = Dtor;
3011    }
3012    // Register the information for the entry associated with the destructor.
3013    Out.clear();
3014    OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3015        DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
3016        ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
3017  }
3018  return CGM.getLangOpts().OpenMPIsDevice;
3019}
3020
3021Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
3022                                                          QualType VarType,
3023                                                          StringRef Name) {
3024  std::string Suffix = getName({"artificial", ""});
3025  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
3026  llvm::Value *GAddr =
3027      getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
3028  if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
3029      CGM.getTarget().isTLSSupported()) {
3030    cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
3031    return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
3032  }
3033  std::string CacheSuffix = getName({"cache", ""});
3034  llvm::Value *Args[] = {
3035      emitUpdateLocation(CGF, SourceLocation()),
3036      getThreadID(CGF, SourceLocation()),
3037      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
3038      CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3039                                /*isSigned=*/false),
3040      getOrCreateInternalVariable(
3041          CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3042  return Address(
3043      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3044          CGF.EmitRuntimeCall(
3045              createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
3046          VarLVType->getPointerTo(/*AddrSpace=*/0)),
3047      CGM.getContext().getTypeAlignInChars(VarType));
3048}
3049
3050void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
3051                                   const RegionCodeGenTy &ThenGen,
3052                                   const RegionCodeGenTy &ElseGen) {
3053  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3054
3055  // If the condition constant folds and can be elided, try to avoid emitting
3056  // the condition and the dead arm of the if/else.
3057  bool CondConstant;
3058  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3059    if (CondConstant)
3060      ThenGen(CGF);
3061    else
3062      ElseGen(CGF);
3063    return;
3064  }
3065
3066  // Otherwise, the condition did not fold, or we couldn't elide it.  Just
3067  // emit the conditional branch.
3068  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3069  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3070  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3071  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3072
3073  // Emit the 'then' code.
3074  CGF.EmitBlock(ThenBlock);
3075  ThenGen(CGF);
3076  CGF.EmitBranch(ContBlock);
3077  // Emit the 'else' code if present.
3078  // There is no need to emit line number for unconditional branch.
3079  (void)ApplyDebugLocation::CreateEmpty(CGF);
3080  CGF.EmitBlock(ElseBlock);
3081  ElseGen(CGF);
3082  // There is no need to emit line number for unconditional branch.
3083  (void)ApplyDebugLocation::CreateEmpty(CGF);
3084  CGF.EmitBranch(ContBlock);
3085  // Emit the continuation block for code after the if.
3086  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3087}
3088
3089void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
3090                                       llvm::Function *OutlinedFn,
3091                                       ArrayRef<llvm::Value *> CapturedVars,
3092                                       const Expr *IfCond) {
3093  if (!CGF.HaveInsertPoint())
3094    return;
3095  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3096  auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3097                                                     PrePostActionTy &) {
3098    // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3099    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3100    llvm::Value *Args[] = {
3101        RTLoc,
3102        CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3103        CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3104    llvm::SmallVector<llvm::Value *, 16> RealArgs;
3105    RealArgs.append(std::begin(Args), std::end(Args));
3106    RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3107
3108    llvm::FunctionCallee RTLFn =
3109        RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3110    CGF.EmitRuntimeCall(RTLFn, RealArgs);
3111  };
3112  auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3113                                                          PrePostActionTy &) {
3114    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3115    llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3116    // Build calls:
3117    // __kmpc_serialized_parallel(&Loc, GTid);
3118    llvm::Value *Args[] = {RTLoc, ThreadID};
3119    CGF.EmitRuntimeCall(
3120        RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3121
3122    // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
3123    Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3124    Address ZeroAddrBound =
3125        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3126                                         /*Name=*/".bound.zero.addr");
3127    CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3128    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3129    // ThreadId for serialized parallels is 0.
3130    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3131    OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3132    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3133    RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3134
3135    // __kmpc_end_serialized_parallel(&Loc, GTid);
3136    llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3137    CGF.EmitRuntimeCall(
3138        RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3139        EndArgs);
3140  };
3141  if (IfCond) {
3142    emitIfClause(CGF, IfCond, ThenGen, ElseGen);
3143  } else {
3144    RegionCodeGenTy ThenRCG(ThenGen);
3145    ThenRCG(CGF);
3146  }
3147}
3148
3149// If we're inside an (outlined) parallel region, use the region info's
3150// thread-ID variable (it is passed in a first argument of the outlined function
3151// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3152// regular serial code region, get thread ID by calling kmp_int32
3153// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3154// return the address of that temp.
3155Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3156                                             SourceLocation Loc) {
3157  if (auto *OMPRegionInfo =
3158          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3159    if (OMPRegionInfo->getThreadIDVariable())
3160      return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
3161
3162  llvm::Value *ThreadID = getThreadID(CGF, Loc);
3163  QualType Int32Ty =
3164      CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3165  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3166  CGF.EmitStoreOfScalar(ThreadID,
3167                        CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3168
3169  return ThreadIDTemp;
3170}
3171
3172llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3173    llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3174  SmallString<256> Buffer;
3175  llvm::raw_svector_ostream Out(Buffer);
3176  Out << Name;
3177  StringRef RuntimeName = Out.str();
3178  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3179  if (Elem.second) {
3180    assert(Elem.second->getType()->getPointerElementType() == Ty &&
3181           "OMP internal variable has different type than requested");
3182    return &*Elem.second;
3183  }
3184
3185  return Elem.second = new llvm::GlobalVariable(
3186             CGM.getModule(), Ty, /*IsConstant*/ false,
3187             llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3188             Elem.first(), /*InsertBefore=*/nullptr,
3189             llvm::GlobalValue::NotThreadLocal, AddressSpace);
3190}
3191
3192llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3193  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3194  std::string Name = getName({Prefix, "var"});
3195  return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3196}
3197
3198namespace {
3199/// Common pre(post)-action for different OpenMP constructs.
3200class CommonActionTy final : public PrePostActionTy {
3201  llvm::FunctionCallee EnterCallee;
3202  ArrayRef<llvm::Value *> EnterArgs;
3203  llvm::FunctionCallee ExitCallee;
3204  ArrayRef<llvm::Value *> ExitArgs;
3205  bool Conditional;
3206  llvm::BasicBlock *ContBlock = nullptr;
3207
3208public:
3209  CommonActionTy(llvm::FunctionCallee EnterCallee,
3210                 ArrayRef<llvm::Value *> EnterArgs,
3211                 llvm::FunctionCallee ExitCallee,
3212                 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3213      : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3214        ExitArgs(ExitArgs), Conditional(Conditional) {}
3215  void Enter(CodeGenFunction &CGF) override {
3216    llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3217    if (Conditional) {
3218      llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3219      auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3220      ContBlock = CGF.createBasicBlock("omp_if.end");
3221      // Generate the branch (If-stmt)
3222      CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3223      CGF.EmitBlock(ThenBlock);
3224    }
3225  }
3226  void Done(CodeGenFunction &CGF) {
3227    // Emit the rest of blocks/branches
3228    CGF.EmitBranch(ContBlock);
3229    CGF.EmitBlock(ContBlock, true);
3230  }
3231  void Exit(CodeGenFunction &CGF) override {
3232    CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3233  }
3234};
3235} // anonymous namespace
3236
3237void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3238                                         StringRef CriticalName,
3239                                         const RegionCodeGenTy &CriticalOpGen,
3240                                         SourceLocation Loc, const Expr *Hint) {
3241  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3242  // CriticalOpGen();
3243  // __kmpc_end_critical(ident_t *, gtid, Lock);
3244  // Prepare arguments and build a call to __kmpc_critical
3245  if (!CGF.HaveInsertPoint())
3246    return;
3247  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3248                         getCriticalRegionLock(CriticalName)};
3249  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3250                                                std::end(Args));
3251  if (Hint) {
3252    EnterArgs.push_back(CGF.Builder.CreateIntCast(
3253        CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3254  }
3255  CommonActionTy Action(
3256      createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3257                                 : OMPRTL__kmpc_critical),
3258      EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3259  CriticalOpGen.setAction(Action);
3260  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3261}
3262
3263void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3264                                       const RegionCodeGenTy &MasterOpGen,
3265                                       SourceLocation Loc) {
3266  if (!CGF.HaveInsertPoint())
3267    return;
3268  // if(__kmpc_master(ident_t *, gtid)) {
3269  //   MasterOpGen();
3270  //   __kmpc_end_master(ident_t *, gtid);
3271  // }
3272  // Prepare arguments and build a call to __kmpc_master
3273  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3274  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3275                        createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3276                        /*Conditional=*/true);
3277  MasterOpGen.setAction(Action);
3278  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3279  Action.Done(CGF);
3280}
3281
3282void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3283                                        SourceLocation Loc) {
3284  if (!CGF.HaveInsertPoint())
3285    return;
3286  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3287  llvm::Value *Args[] = {
3288      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3289      llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3290  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3291  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3292    Region->emitUntiedSwitch(CGF);
3293}
3294
3295void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3296                                          const RegionCodeGenTy &TaskgroupOpGen,
3297                                          SourceLocation Loc) {
3298  if (!CGF.HaveInsertPoint())
3299    return;
3300  // __kmpc_taskgroup(ident_t *, gtid);
3301  // TaskgroupOpGen();
3302  // __kmpc_end_taskgroup(ident_t *, gtid);
3303  // Prepare arguments and build a call to __kmpc_taskgroup
3304  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3305  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3306                        createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3307                        Args);
3308  TaskgroupOpGen.setAction(Action);
3309  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3310}
3311
3312/// Given an array of pointers to variables, project the address of a
3313/// given variable.
3314static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3315                                      unsigned Index, const VarDecl *Var) {
3316  // Pull out the pointer to the variable.
3317  Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3318  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3319
3320  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3321  Addr = CGF.Builder.CreateElementBitCast(
3322      Addr, CGF.ConvertTypeForMem(Var->getType()));
3323  return Addr;
3324}
3325
3326static llvm::Value *emitCopyprivateCopyFunction(
3327    CodeGenModule &CGM, llvm::Type *ArgsType,
3328    ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3329    ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3330    SourceLocation Loc) {
3331  ASTContext &C = CGM.getContext();
3332  // void copy_func(void *LHSArg, void *RHSArg);
3333  FunctionArgList Args;
3334  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3335                           ImplicitParamDecl::Other);
3336  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3337                           ImplicitParamDecl::Other);
3338  Args.push_back(&LHSArg);
3339  Args.push_back(&RHSArg);
3340  const auto &CGFI =
3341      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3342  std::string Name =
3343      CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3344  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3345                                    llvm::GlobalValue::InternalLinkage, Name,
3346                                    &CGM.getModule());
3347  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3348  Fn->setDoesNotRecurse();
3349  CodeGenFunction CGF(CGM);
3350  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3351  // Dest = (void*[n])(LHSArg);
3352  // Src = (void*[n])(RHSArg);
3353  Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3354      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3355      ArgsType), CGF.getPointerAlign());
3356  Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3357      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3358      ArgsType), CGF.getPointerAlign());
3359  // *(Type0*)Dst[0] = *(Type0*)Src[0];
3360  // *(Type1*)Dst[1] = *(Type1*)Src[1];
3361  // ...
3362  // *(Typen*)Dst[n] = *(Typen*)Src[n];
3363  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3364    const auto *DestVar =
3365        cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3366    Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3367
3368    const auto *SrcVar =
3369        cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3370    Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3371
3372    const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3373    QualType Type = VD->getType();
3374    CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3375  }
3376  CGF.FinishFunction();
3377  return Fn;
3378}
3379
3380void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3381                                       const RegionCodeGenTy &SingleOpGen,
3382                                       SourceLocation Loc,
3383                                       ArrayRef<const Expr *> CopyprivateVars,
3384                                       ArrayRef<const Expr *> SrcExprs,
3385                                       ArrayRef<const Expr *> DstExprs,
3386                                       ArrayRef<const Expr *> AssignmentOps) {
3387  if (!CGF.HaveInsertPoint())
3388    return;
3389  assert(CopyprivateVars.size() == SrcExprs.size() &&
3390         CopyprivateVars.size() == DstExprs.size() &&
3391         CopyprivateVars.size() == AssignmentOps.size());
3392  ASTContext &C = CGM.getContext();
3393  // int32 did_it = 0;
3394  // if(__kmpc_single(ident_t *, gtid)) {
3395  //   SingleOpGen();
3396  //   __kmpc_end_single(ident_t *, gtid);
3397  //   did_it = 1;
3398  // }
3399  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3400  // <copy_func>, did_it);
3401
3402  Address DidIt = Address::invalid();
3403  if (!CopyprivateVars.empty()) {
3404    // int32 did_it = 0;
3405    QualType KmpInt32Ty =
3406        C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3407    DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3408    CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3409  }
3410  // Prepare arguments and build a call to __kmpc_single
3411  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3412  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3413                        createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3414                        /*Conditional=*/true);
3415  SingleOpGen.setAction(Action);
3416  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3417  if (DidIt.isValid()) {
3418    // did_it = 1;
3419    CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3420  }
3421  Action.Done(CGF);
3422  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3423  // <copy_func>, did_it);
3424  if (DidIt.isValid()) {
3425    llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3426    QualType CopyprivateArrayTy = C.getConstantArrayType(
3427        C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3428        /*IndexTypeQuals=*/0);
3429    // Create a list of all private variables for copyprivate.
3430    Address CopyprivateList =
3431        CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3432    for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3433      Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3434      CGF.Builder.CreateStore(
3435          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3436              CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
3437              CGF.VoidPtrTy),
3438          Elem);
3439    }
3440    // Build function that copies private values from single region to all other
3441    // threads in the corresponding parallel region.
3442    llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3443        CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3444        CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3445    llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3446    Address CL =
3447      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3448                                                      CGF.VoidPtrTy);
3449    llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3450    llvm::Value *Args[] = {
3451        emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3452        getThreadID(CGF, Loc),        // i32 <gtid>
3453        BufSize,                      // size_t <buf_size>
3454        CL.getPointer(),              // void *<copyprivate list>
3455        CpyFn,                        // void (*) (void *, void *) <copy_func>
3456        DidItVal                      // i32 did_it
3457    };
3458    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3459  }
3460}
3461
3462void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3463                                        const RegionCodeGenTy &OrderedOpGen,
3464                                        SourceLocation Loc, bool IsThreads) {
3465  if (!CGF.HaveInsertPoint())
3466    return;
3467  // __kmpc_ordered(ident_t *, gtid);
3468  // OrderedOpGen();
3469  // __kmpc_end_ordered(ident_t *, gtid);
3470  // Prepare arguments and build a call to __kmpc_ordered
3471  if (IsThreads) {
3472    llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3473    CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3474                          createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3475                          Args);
3476    OrderedOpGen.setAction(Action);
3477    emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3478    return;
3479  }
3480  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3481}
3482
3483unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3484  unsigned Flags;
3485  if (Kind == OMPD_for)
3486    Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3487  else if (Kind == OMPD_sections)
3488    Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3489  else if (Kind == OMPD_single)
3490    Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3491  else if (Kind == OMPD_barrier)
3492    Flags = OMP_IDENT_BARRIER_EXPL;
3493  else
3494    Flags = OMP_IDENT_BARRIER_IMPL;
3495  return Flags;
3496}
3497
3498void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3499    CodeGenFunction &CGF, const OMPLoopDirective &S,
3500    OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3501  // Check if the loop directive is actually a doacross loop directive. In this
3502  // case choose static, 1 schedule.
3503  if (llvm::any_of(
3504          S.getClausesOfKind<OMPOrderedClause>(),
3505          [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3506    ScheduleKind = OMPC_SCHEDULE_static;
3507    // Chunk size is 1 in this case.
3508    llvm::APInt ChunkSize(32, 1);
3509    ChunkExpr = IntegerLiteral::Create(
3510        CGF.getContext(), ChunkSize,
3511        CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3512        SourceLocation());
3513  }
3514}
3515
3516void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3517                                      OpenMPDirectiveKind Kind, bool EmitChecks,
3518                                      bool ForceSimpleCall) {
3519  // Check if we should use the OMPBuilder
3520  auto *OMPRegionInfo =
3521      dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
3522  llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3523  if (OMPBuilder) {
3524    CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
3525        CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
3526    return;
3527  }
3528
3529  if (!CGF.HaveInsertPoint())
3530    return;
3531  // Build call __kmpc_cancel_barrier(loc, thread_id);
3532  // Build call __kmpc_barrier(loc, thread_id);
3533  unsigned Flags = getDefaultFlagsForBarriers(Kind);
3534  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3535  // thread_id);
3536  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3537                         getThreadID(CGF, Loc)};
3538  if (OMPRegionInfo) {
3539    if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3540      llvm::Value *Result = CGF.EmitRuntimeCall(
3541          createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3542      if (EmitChecks) {
3543        // if (__kmpc_cancel_barrier()) {
3544        //   exit from construct;
3545        // }
3546        llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3547        llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3548        llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3549        CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3550        CGF.EmitBlock(ExitBB);
3551        //   exit from construct;
3552        CodeGenFunction::JumpDest CancelDestination =
3553            CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3554        CGF.EmitBranchThroughCleanup(CancelDestination);
3555        CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3556      }
3557      return;
3558    }
3559  }
3560  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3561}
3562
3563/// Map the OpenMP loop schedule to the runtime enumeration.
3564static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3565                                          bool Chunked, bool Ordered) {
3566  switch (ScheduleKind) {
3567  case OMPC_SCHEDULE_static:
3568    return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3569                   : (Ordered ? OMP_ord_static : OMP_sch_static);
3570  case OMPC_SCHEDULE_dynamic:
3571    return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3572  case OMPC_SCHEDULE_guided:
3573    return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3574  case OMPC_SCHEDULE_runtime:
3575    return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3576  case OMPC_SCHEDULE_auto:
3577    return Ordered ? OMP_ord_auto : OMP_sch_auto;
3578  case OMPC_SCHEDULE_unknown:
3579    assert(!Chunked && "chunk was specified but schedule kind not known");
3580    return Ordered ? OMP_ord_static : OMP_sch_static;
3581  }
3582  llvm_unreachable("Unexpected runtime schedule");
3583}
3584
3585/// Map the OpenMP distribute schedule to the runtime enumeration.
3586static OpenMPSchedType
3587getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3588  // only static is allowed for dist_schedule
3589  return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3590}
3591
3592bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3593                                         bool Chunked) const {
3594  OpenMPSchedType Schedule =
3595      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3596  return Schedule == OMP_sch_static;
3597}
3598
3599bool CGOpenMPRuntime::isStaticNonchunked(
3600    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3601  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3602  return Schedule == OMP_dist_sch_static;
3603}
3604
3605bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3606                                      bool Chunked) const {
3607  OpenMPSchedType Schedule =
3608      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3609  return Schedule == OMP_sch_static_chunked;
3610}
3611
3612bool CGOpenMPRuntime::isStaticChunked(
3613    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3614  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3615  return Schedule == OMP_dist_sch_static_chunked;
3616}
3617
3618bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3619  OpenMPSchedType Schedule =
3620      getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3621  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3622  return Schedule != OMP_sch_static;
3623}
3624
3625static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3626                                  OpenMPScheduleClauseModifier M1,
3627                                  OpenMPScheduleClauseModifier M2) {
3628  int Modifier = 0;
3629  switch (M1) {
3630  case OMPC_SCHEDULE_MODIFIER_monotonic:
3631    Modifier = OMP_sch_modifier_monotonic;
3632    break;
3633  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3634    Modifier = OMP_sch_modifier_nonmonotonic;
3635    break;
3636  case OMPC_SCHEDULE_MODIFIER_simd:
3637    if (Schedule == OMP_sch_static_chunked)
3638      Schedule = OMP_sch_static_balanced_chunked;
3639    break;
3640  case OMPC_SCHEDULE_MODIFIER_last:
3641  case OMPC_SCHEDULE_MODIFIER_unknown:
3642    break;
3643  }
3644  switch (M2) {
3645  case OMPC_SCHEDULE_MODIFIER_monotonic:
3646    Modifier = OMP_sch_modifier_monotonic;
3647    break;
3648  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3649    Modifier = OMP_sch_modifier_nonmonotonic;
3650    break;
3651  case OMPC_SCHEDULE_MODIFIER_simd:
3652    if (Schedule == OMP_sch_static_chunked)
3653      Schedule = OMP_sch_static_balanced_chunked;
3654    break;
3655  case OMPC_SCHEDULE_MODIFIER_last:
3656  case OMPC_SCHEDULE_MODIFIER_unknown:
3657    break;
3658  }
3659  // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3660  // If the static schedule kind is specified or if the ordered clause is
3661  // specified, and if the nonmonotonic modifier is not specified, the effect is
3662  // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3663  // modifier is specified, the effect is as if the nonmonotonic modifier is
3664  // specified.
3665  if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3666    if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3667          Schedule == OMP_sch_static_balanced_chunked ||
3668          Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
3669          Schedule == OMP_dist_sch_static_chunked ||
3670          Schedule == OMP_dist_sch_static))
3671      Modifier = OMP_sch_modifier_nonmonotonic;
3672  }
3673  return Schedule | Modifier;
3674}
3675
3676void CGOpenMPRuntime::emitForDispatchInit(
3677    CodeGenFunction &CGF, SourceLocation Loc,
3678    const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3679    bool Ordered, const DispatchRTInput &DispatchValues) {
3680  if (!CGF.HaveInsertPoint())
3681    return;
3682  OpenMPSchedType Schedule = getRuntimeSchedule(
3683      ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3684  assert(Ordered ||
3685         (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3686          Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3687          Schedule != OMP_sch_static_balanced_chunked));
3688  // Call __kmpc_dispatch_init(
3689  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3690  //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3691  //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3692
3693  // If the Chunk was not specified in the clause - use default value 1.
3694  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3695                                            : CGF.Builder.getIntN(IVSize, 1);
3696  llvm::Value *Args[] = {
3697      emitUpdateLocation(CGF, Loc),
3698      getThreadID(CGF, Loc),
3699      CGF.Builder.getInt32(addMonoNonMonoModifier(
3700          CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3701      DispatchValues.LB,                                     // Lower
3702      DispatchValues.UB,                                     // Upper
3703      CGF.Builder.getIntN(IVSize, 1),                        // Stride
3704      Chunk                                                  // Chunk
3705  };
3706  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3707}
3708
3709static void emitForStaticInitCall(
3710    CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3711    llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3712    OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3713    const CGOpenMPRuntime::StaticRTInput &Values) {
3714  if (!CGF.HaveInsertPoint())
3715    return;
3716
3717  assert(!Values.Ordered);
3718  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3719         Schedule == OMP_sch_static_balanced_chunked ||
3720         Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3721         Schedule == OMP_dist_sch_static ||
3722         Schedule == OMP_dist_sch_static_chunked);
3723
3724  // Call __kmpc_for_static_init(
3725  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3726  //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3727  //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3728  //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3729  llvm::Value *Chunk = Values.Chunk;
3730  if (Chunk == nullptr) {
3731    assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3732            Schedule == OMP_dist_sch_static) &&
3733           "expected static non-chunked schedule");
3734    // If the Chunk was not specified in the clause - use default value 1.
3735    Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3736  } else {
3737    assert((Schedule == OMP_sch_static_chunked ||
3738            Schedule == OMP_sch_static_balanced_chunked ||
3739            Schedule == OMP_ord_static_chunked ||
3740            Schedule == OMP_dist_sch_static_chunked) &&
3741           "expected static chunked schedule");
3742  }
3743  llvm::Value *Args[] = {
3744      UpdateLocation,
3745      ThreadId,
3746      CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3747                                                  M2)), // Schedule type
3748      Values.IL.getPointer(),                           // &isLastIter
3749      Values.LB.getPointer(),                           // &LB
3750      Values.UB.getPointer(),                           // &UB
3751      Values.ST.getPointer(),                           // &Stride
3752      CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3753      Chunk                                             // Chunk
3754  };
3755  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3756}
3757
3758void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3759                                        SourceLocation Loc,
3760                                        OpenMPDirectiveKind DKind,
3761                                        const OpenMPScheduleTy &ScheduleKind,
3762                                        const StaticRTInput &Values) {
3763  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3764      ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3765  assert(isOpenMPWorksharingDirective(DKind) &&
3766         "Expected loop-based or sections-based directive.");
3767  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3768                                             isOpenMPLoopDirective(DKind)
3769                                                 ? OMP_IDENT_WORK_LOOP
3770                                                 : OMP_IDENT_WORK_SECTIONS);
3771  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3772  llvm::FunctionCallee StaticInitFunction =
3773      createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3774  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3775                        ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3776}
3777
3778void CGOpenMPRuntime::emitDistributeStaticInit(
3779    CodeGenFunction &CGF, SourceLocation Loc,
3780    OpenMPDistScheduleClauseKind SchedKind,
3781    const CGOpenMPRuntime::StaticRTInput &Values) {
3782  OpenMPSchedType ScheduleNum =
3783      getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3784  llvm::Value *UpdatedLocation =
3785      emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3786  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3787  llvm::FunctionCallee StaticInitFunction =
3788      createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3789  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3790                        ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3791                        OMPC_SCHEDULE_MODIFIER_unknown, Values);
3792}
3793
3794void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3795                                          SourceLocation Loc,
3796                                          OpenMPDirectiveKind DKind) {
3797  if (!CGF.HaveInsertPoint())
3798    return;
3799  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3800  llvm::Value *Args[] = {
3801      emitUpdateLocation(CGF, Loc,
3802                         isOpenMPDistributeDirective(DKind)
3803                             ? OMP_IDENT_WORK_DISTRIBUTE
3804                             : isOpenMPLoopDirective(DKind)
3805                                   ? OMP_IDENT_WORK_LOOP
3806                                   : OMP_IDENT_WORK_SECTIONS),
3807      getThreadID(CGF, Loc)};
3808  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3809                      Args);
3810}
3811
3812void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3813                                                 SourceLocation Loc,
3814                                                 unsigned IVSize,
3815                                                 bool IVSigned) {
3816  if (!CGF.HaveInsertPoint())
3817    return;
3818  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3819  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3820  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3821}
3822
3823llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3824                                          SourceLocation Loc, unsigned IVSize,
3825                                          bool IVSigned, Address IL,
3826                                          Address LB, Address UB,
3827                                          Address ST) {
3828  // Call __kmpc_dispatch_next(
3829  //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3830  //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3831  //          kmp_int[32|64] *p_stride);
3832  llvm::Value *Args[] = {
3833      emitUpdateLocation(CGF, Loc),
3834      getThreadID(CGF, Loc),
3835      IL.getPointer(), // &isLastIter
3836      LB.getPointer(), // &Lower
3837      UB.getPointer(), // &Upper
3838      ST.getPointer()  // &Stride
3839  };
3840  llvm::Value *Call =
3841      CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3842  return CGF.EmitScalarConversion(
3843      Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3844      CGF.getContext().BoolTy, Loc);
3845}
3846
3847void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3848                                           llvm::Value *NumThreads,
3849                                           SourceLocation Loc) {
3850  if (!CGF.HaveInsertPoint())
3851    return;
3852  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3853  llvm::Value *Args[] = {
3854      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3855      CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3856  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3857                      Args);
3858}
3859
3860void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3861                                         ProcBindKind ProcBind,
3862                                         SourceLocation Loc) {
3863  if (!CGF.HaveInsertPoint())
3864    return;
3865  assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
3866  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3867  llvm::Value *Args[] = {
3868      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3869      llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
3870  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3871}
3872
3873void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3874                                SourceLocation Loc) {
3875  if (!CGF.HaveInsertPoint())
3876    return;
3877  // Build call void __kmpc_flush(ident_t *loc)
3878  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3879                      emitUpdateLocation(CGF, Loc));
3880}
3881
3882namespace {
3883/// Indexes of fields for type kmp_task_t.
3884enum KmpTaskTFields {
3885  /// List of shared variables.
3886  KmpTaskTShareds,
3887  /// Task routine.
3888  KmpTaskTRoutine,
3889  /// Partition id for the untied tasks.
3890  KmpTaskTPartId,
3891  /// Function with call of destructors for private variables.
3892  Data1,
3893  /// Task priority.
3894  Data2,
3895  /// (Taskloops only) Lower bound.
3896  KmpTaskTLowerBound,
3897  /// (Taskloops only) Upper bound.
3898  KmpTaskTUpperBound,
3899  /// (Taskloops only) Stride.
3900  KmpTaskTStride,
3901  /// (Taskloops only) Is last iteration flag.
3902  KmpTaskTLastIter,
3903  /// (Taskloops only) Reduction data.
3904  KmpTaskTReductions,
3905};
3906} // anonymous namespace
3907
3908bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3909  return OffloadEntriesTargetRegion.empty() &&
3910         OffloadEntriesDeviceGlobalVar.empty();
3911}
3912
3913/// Initialize target region entry.
3914void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3915    initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3916                                    StringRef ParentName, unsigned LineNum,
3917                                    unsigned Order) {
3918  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3919                                             "only required for the device "
3920                                             "code generation.");
3921  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3922      OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3923                                   OMPTargetRegionEntryTargetRegion);
3924  ++OffloadingEntriesNum;
3925}
3926
3927void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3928    registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3929                                  StringRef ParentName, unsigned LineNum,
3930                                  llvm::Constant *Addr, llvm::Constant *ID,
3931                                  OMPTargetRegionEntryKind Flags) {
3932  // If we are emitting code for a target, the entry is already initialized,
3933  // only has to be registered.
3934  if (CGM.getLangOpts().OpenMPIsDevice) {
3935    if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3936      unsigned DiagID = CGM.getDiags().getCustomDiagID(
3937          DiagnosticsEngine::Error,
3938          "Unable to find target region on line '%0' in the device code.");
3939      CGM.getDiags().Report(DiagID) << LineNum;
3940      return;
3941    }
3942    auto &Entry =
3943        OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3944    assert(Entry.isValid() && "Entry not initialized!");
3945    Entry.setAddress(Addr);
3946    Entry.setID(ID);
3947    Entry.setFlags(Flags);
3948  } else {
3949    OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3950    OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3951    ++OffloadingEntriesNum;
3952  }
3953}
3954
3955bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3956    unsigned DeviceID, unsigned FileID, StringRef ParentName,
3957    unsigned LineNum) const {
3958  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3959  if (PerDevice == OffloadEntriesTargetRegion.end())
3960    return false;
3961  auto PerFile = PerDevice->second.find(FileID);
3962  if (PerFile == PerDevice->second.end())
3963    return false;
3964  auto PerParentName = PerFile->second.find(ParentName);
3965  if (PerParentName == PerFile->second.end())
3966    return false;
3967  auto PerLine = PerParentName->second.find(LineNum);
3968  if (PerLine == PerParentName->second.end())
3969    return false;
3970  // Fail if this entry is already registered.
3971  if (PerLine->second.getAddress() || PerLine->second.getID())
3972    return false;
3973  return true;
3974}
3975
3976void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3977    const OffloadTargetRegionEntryInfoActTy &Action) {
3978  // Scan all target region entries and perform the provided action.
3979  for (const auto &D : OffloadEntriesTargetRegion)
3980    for (const auto &F : D.second)
3981      for (const auto &P : F.second)
3982        for (const auto &L : P.second)
3983          Action(D.first, F.first, P.first(), L.first, L.second);
3984}
3985
3986void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3987    initializeDeviceGlobalVarEntryInfo(StringRef Name,
3988                                       OMPTargetGlobalVarEntryKind Flags,
3989                                       unsigned Order) {
3990  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3991                                             "only required for the device "
3992                                             "code generation.");
3993  OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3994  ++OffloadingEntriesNum;
3995}
3996
3997void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3998    registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3999                                     CharUnits VarSize,
4000                                     OMPTargetGlobalVarEntryKind Flags,
4001                                     llvm::GlobalValue::LinkageTypes Linkage) {
4002  if (CGM.getLangOpts().OpenMPIsDevice) {
4003    auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4004    assert(Entry.isValid() && Entry.getFlags() == Flags &&
4005           "Entry not initialized!");
4006    assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4007           "Resetting with the new address.");
4008    if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
4009      if (Entry.getVarSize().isZero()) {
4010        Entry.setVarSize(VarSize);
4011        Entry.setLinkage(Linkage);
4012      }
4013      return;
4014    }
4015    Entry.setVarSize(VarSize);
4016    Entry.setLinkage(Linkage);
4017    Entry.setAddress(Addr);
4018  } else {
4019    if (hasDeviceGlobalVarEntryInfo(VarName)) {
4020      auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4021      assert(Entry.isValid() && Entry.getFlags() == Flags &&
4022             "Entry not initialized!");
4023      assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4024             "Resetting with the new address.");
4025      if (Entry.getVarSize().isZero()) {
4026        Entry.setVarSize(VarSize);
4027        Entry.setLinkage(Linkage);
4028      }
4029      return;
4030    }
4031    OffloadEntriesDeviceGlobalVar.try_emplace(
4032        VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4033    ++OffloadingEntriesNum;
4034  }
4035}
4036
4037void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4038    actOnDeviceGlobalVarEntriesInfo(
4039        const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4040  // Scan all target region entries and perform the provided action.
4041  for (const auto &E : OffloadEntriesDeviceGlobalVar)
4042    Action(E.getKey(), E.getValue());
4043}
4044
4045void CGOpenMPRuntime::createOffloadEntry(
4046    llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4047    llvm::GlobalValue::LinkageTypes Linkage) {
4048  StringRef Name = Addr->getName();
4049  llvm::Module &M = CGM.getModule();
4050  llvm::LLVMContext &C = M.getContext();
4051
4052  // Create constant string with the name.
4053  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4054
4055  std::string StringName = getName({"omp_offloading", "entry_name"});
4056  auto *Str = new llvm::GlobalVariable(
4057      M, StrPtrInit->getType(), /*isConstant=*/true,
4058      llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4059  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4060
4061  llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4062                            llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4063                            llvm::ConstantInt::get(CGM.SizeTy, Size),
4064                            llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4065                            llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4066  std::string EntryName = getName({"omp_offloading", "entry", ""});
4067  llvm::GlobalVariable *Entry = createGlobalStruct(
4068      CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4069      Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4070
4071  // The entry has to be created in the section the linker expects it to be.
4072  Entry->setSection("omp_offloading_entries");
4073}
4074
4075void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4076  // Emit the offloading entries and metadata so that the device codegen side
4077  // can easily figure out what to emit. The produced metadata looks like
4078  // this:
4079  //
4080  // !omp_offload.info = !{!1, ...}
4081  //
4082  // Right now we only generate metadata for function that contain target
4083  // regions.
4084
4085  // If we are in simd mode or there are no entries, we don't need to do
4086  // anything.
4087  if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4088    return;
4089
4090  llvm::Module &M = CGM.getModule();
4091  llvm::LLVMContext &C = M.getContext();
4092  SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
4093                         SourceLocation, StringRef>,
4094              16>
4095      OrderedEntries(OffloadEntriesInfoManager.size());
4096  llvm::SmallVector<StringRef, 16> ParentFunctions(
4097      OffloadEntriesInfoManager.size());
4098
4099  // Auxiliary methods to create metadata values and strings.
4100  auto &&GetMDInt = [this](unsigned V) {
4101    return llvm::ConstantAsMetadata::get(
4102        llvm::ConstantInt::get(CGM.Int32Ty, V));
4103  };
4104
4105  auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4106
4107  // Create the offloading info metadata node.
4108  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4109
4110  // Create function that emits metadata for each target region entry;
4111  auto &&TargetRegionMetadataEmitter =
4112      [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4113       &GetMDString](
4114          unsigned DeviceID, unsigned FileID, StringRef ParentName,
4115          unsigned Line,
4116          const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4117        // Generate metadata for target regions. Each entry of this metadata
4118        // contains:
4119        // - Entry 0 -> Kind of this type of metadata (0).
4120        // - Entry 1 -> Device ID of the file where the entry was identified.
4121        // - Entry 2 -> File ID of the file where the entry was identified.
4122        // - Entry 3 -> Mangled name of the function where the entry was
4123        // identified.
4124        // - Entry 4 -> Line in the file where the entry was identified.
4125        // - Entry 5 -> Order the entry was created.
4126        // The first element of the metadata node is the kind.
4127        llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4128                                 GetMDInt(FileID),      GetMDString(ParentName),
4129                                 GetMDInt(Line),        GetMDInt(E.getOrder())};
4130
4131        SourceLocation Loc;
4132        for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4133                  E = CGM.getContext().getSourceManager().fileinfo_end();
4134             I != E; ++I) {
4135          if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4136              I->getFirst()->getUniqueID().getFile() == FileID) {
4137            Loc = CGM.getContext().getSourceManager().translateFileLineCol(
4138                I->getFirst(), Line, 1);
4139            break;
4140          }
4141        }
4142        // Save this entry in the right position of the ordered entries array.
4143        OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4144        ParentFunctions[E.getOrder()] = ParentName;
4145
4146        // Add metadata to the named metadata node.
4147        MD->addOperand(llvm::MDNode::get(C, Ops));
4148      };
4149
4150  OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4151      TargetRegionMetadataEmitter);
4152
4153  // Create function that emits metadata for each device global variable entry;
4154  auto &&DeviceGlobalVarMetadataEmitter =
4155      [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4156       MD](StringRef MangledName,
4157           const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4158               &E) {
4159        // Generate metadata for global variables. Each entry of this metadata
4160        // contains:
4161        // - Entry 0 -> Kind of this type of metadata (1).
4162        // - Entry 1 -> Mangled name of the variable.
4163        // - Entry 2 -> Declare target kind.
4164        // - Entry 3 -> Order the entry was created.
4165        // The first element of the metadata node is the kind.
4166        llvm::Metadata *Ops[] = {
4167            GetMDInt(E.getKind()), GetMDString(MangledName),
4168            GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4169
4170        // Save this entry in the right position of the ordered entries array.
4171        OrderedEntries[E.getOrder()] =
4172            std::make_tuple(&E, SourceLocation(), MangledName);
4173
4174        // Add metadata to the named metadata node.
4175        MD->addOperand(llvm::MDNode::get(C, Ops));
4176      };
4177
4178  OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4179      DeviceGlobalVarMetadataEmitter);
4180
4181  for (const auto &E : OrderedEntries) {
4182    assert(std::get<0>(E) && "All ordered entries must exist!");
4183    if (const auto *CE =
4184            dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4185                std::get<0>(E))) {
4186      if (!CE->getID() || !CE->getAddress()) {
4187        // Do not blame the entry if the parent funtion is not emitted.
4188        StringRef FnName = ParentFunctions[CE->getOrder()];
4189        if (!CGM.GetGlobalValue(FnName))
4190          continue;
4191        unsigned DiagID = CGM.getDiags().getCustomDiagID(
4192            DiagnosticsEngine::Error,
4193            "Offloading entry for target region in %0 is incorrect: either the "
4194            "address or the ID is invalid.");
4195        CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4196        continue;
4197      }
4198      createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4199                         CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4200    } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4201                                             OffloadEntryInfoDeviceGlobalVar>(
4202                   std::get<0>(E))) {
4203      OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4204          static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4205              CE->getFlags());
4206      switch (Flags) {
4207      case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4208        if (CGM.getLangOpts().OpenMPIsDevice &&
4209            CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4210          continue;
4211        if (!CE->getAddress()) {
4212          unsigned DiagID = CGM.getDiags().getCustomDiagID(
4213              DiagnosticsEngine::Error, "Offloading entry for declare target "
4214                                        "variable %0 is incorrect: the "
4215                                        "address is invalid.");
4216          CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4217          continue;
4218        }
4219        // The vaiable has no definition - no need to add the entry.
4220        if (CE->getVarSize().isZero())
4221          continue;
4222        break;
4223      }
4224      case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4225        assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4226                (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4227               "Declaret target link address is set.");
4228        if (CGM.getLangOpts().OpenMPIsDevice)
4229          continue;
4230        if (!CE->getAddress()) {
4231          unsigned DiagID = CGM.getDiags().getCustomDiagID(
4232              DiagnosticsEngine::Error,
4233              "Offloading entry for declare target variable is incorrect: the "
4234              "address is invalid.");
4235          CGM.getDiags().Report(DiagID);
4236          continue;
4237        }
4238        break;
4239      }
4240      createOffloadEntry(CE->getAddress(), CE->getAddress(),
4241                         CE->getVarSize().getQuantity(), Flags,
4242                         CE->getLinkage());
4243    } else {
4244      llvm_unreachable("Unsupported entry kind.");
4245    }
4246  }
4247}
4248
4249/// Loads all the offload entries information from the host IR
4250/// metadata.
4251void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4252  // If we are in target mode, load the metadata from the host IR. This code has
4253  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4254
4255  if (!CGM.getLangOpts().OpenMPIsDevice)
4256    return;
4257
4258  if (CGM.getLangOpts().OMPHostIRFile.empty())
4259    return;
4260
4261  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4262  if (auto EC = Buf.getError()) {
4263    CGM.getDiags().Report(diag::err_cannot_open_file)
4264        << CGM.getLangOpts().OMPHostIRFile << EC.message();
4265    return;
4266  }
4267
4268  llvm::LLVMContext C;
4269  auto ME = expectedToErrorOrAndEmitErrors(
4270      C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4271
4272  if (auto EC = ME.getError()) {
4273    unsigned DiagID = CGM.getDiags().getCustomDiagID(
4274        DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4275    CGM.getDiags().Report(DiagID)
4276        << CGM.getLangOpts().OMPHostIRFile << EC.message();
4277    return;
4278  }
4279
4280  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4281  if (!MD)
4282    return;
4283
4284  for (llvm::MDNode *MN : MD->operands()) {
4285    auto &&GetMDInt = [MN](unsigned Idx) {
4286      auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4287      return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4288    };
4289
4290    auto &&GetMDString = [MN](unsigned Idx) {
4291      auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4292      return V->getString();
4293    };
4294
4295    switch (GetMDInt(0)) {
4296    default:
4297      llvm_unreachable("Unexpected metadata!");
4298      break;
4299    case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4300        OffloadingEntryInfoTargetRegion:
4301      OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4302          /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4303          /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4304          /*Order=*/GetMDInt(5));
4305      break;
4306    case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4307        OffloadingEntryInfoDeviceGlobalVar:
4308      OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4309          /*MangledName=*/GetMDString(1),
4310          static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4311              /*Flags=*/GetMDInt(2)),
4312          /*Order=*/GetMDInt(3));
4313      break;
4314    }
4315  }
4316}
4317
4318void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4319  if (!KmpRoutineEntryPtrTy) {
4320    // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4321    ASTContext &C = CGM.getContext();
4322    QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4323    FunctionProtoType::ExtProtoInfo EPI;
4324    KmpRoutineEntryPtrQTy = C.getPointerType(
4325        C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4326    KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4327  }
4328}
4329
4330QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4331  // Make sure the type of the entry is already created. This is the type we
4332  // have to create:
4333  // struct __tgt_offload_entry{
4334  //   void      *addr;       // Pointer to the offload entry info.
4335  //                          // (function or global)
4336  //   char      *name;       // Name of the function or global.
4337  //   size_t     size;       // Size of the entry info (0 if it a function).
4338  //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4339  //   int32_t    reserved;   // Reserved, to use by the runtime library.
4340  // };
4341  if (TgtOffloadEntryQTy.isNull()) {
4342    ASTContext &C = CGM.getContext();
4343    RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4344    RD->startDefinition();
4345    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4346    addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4347    addFieldToRecordDecl(C, RD, C.getSizeType());
4348    addFieldToRecordDecl(
4349        C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4350    addFieldToRecordDecl(
4351        C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4352    RD->completeDefinition();
4353    RD->addAttr(PackedAttr::CreateImplicit(C));
4354    TgtOffloadEntryQTy = C.getRecordType(RD);
4355  }
4356  return TgtOffloadEntryQTy;
4357}
4358
4359namespace {
4360struct PrivateHelpersTy {
4361  PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4362                   const VarDecl *PrivateElemInit)
4363      : Original(Original), PrivateCopy(PrivateCopy),
4364        PrivateElemInit(PrivateElemInit) {}
4365  const VarDecl *Original;
4366  const VarDecl *PrivateCopy;
4367  const VarDecl *PrivateElemInit;
4368};
4369typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4370} // anonymous namespace
4371
4372static RecordDecl *
4373createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4374  if (!Privates.empty()) {
4375    ASTContext &C = CGM.getContext();
4376    // Build struct .kmp_privates_t. {
4377    //         /*  private vars  */
4378    //       };
4379    RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4380    RD->startDefinition();
4381    for (const auto &Pair : Privates) {
4382      const VarDecl *VD = Pair.second.Original;
4383      QualType Type = VD->getType().getNonReferenceType();
4384      FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4385      if (VD->hasAttrs()) {
4386        for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4387             E(VD->getAttrs().end());
4388             I != E; ++I)
4389          FD->addAttr(*I);
4390      }
4391    }
4392    RD->completeDefinition();
4393    return RD;
4394  }
4395  return nullptr;
4396}
4397
4398static RecordDecl *
4399createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4400                         QualType KmpInt32Ty,
4401                         QualType KmpRoutineEntryPointerQTy) {
4402  ASTContext &C = CGM.getContext();
4403  // Build struct kmp_task_t {
4404  //         void *              shareds;
4405  //         kmp_routine_entry_t routine;
4406  //         kmp_int32           part_id;
4407  //         kmp_cmplrdata_t data1;
4408  //         kmp_cmplrdata_t data2;
4409  // For taskloops additional fields:
4410  //         kmp_uint64          lb;
4411  //         kmp_uint64          ub;
4412  //         kmp_int64           st;
4413  //         kmp_int32           liter;
4414  //         void *              reductions;
4415  //       };
4416  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4417  UD->startDefinition();
4418  addFieldToRecordDecl(C, UD, KmpInt32Ty);
4419  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4420  UD->completeDefinition();
4421  QualType KmpCmplrdataTy = C.getRecordType(UD);
4422  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4423  RD->startDefinition();
4424  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4425  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4426  addFieldToRecordDecl(C, RD, KmpInt32Ty);
4427  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4428  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4429  if (isOpenMPTaskLoopDirective(Kind)) {
4430    QualType KmpUInt64Ty =
4431        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4432    QualType KmpInt64Ty =
4433        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4434    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4435    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4436    addFieldToRecordDecl(C, RD, KmpInt64Ty);
4437    addFieldToRecordDecl(C, RD, KmpInt32Ty);
4438    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4439  }
4440  RD->completeDefinition();
4441  return RD;
4442}
4443
4444static RecordDecl *
4445createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4446                                     ArrayRef<PrivateDataTy> Privates) {
4447  ASTContext &C = CGM.getContext();
4448  // Build struct kmp_task_t_with_privates {
4449  //         kmp_task_t task_data;
4450  //         .kmp_privates_t. privates;
4451  //       };
4452  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4453  RD->startDefinition();
4454  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4455  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4456    addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4457  RD->completeDefinition();
4458  return RD;
4459}
4460
4461/// Emit a proxy function which accepts kmp_task_t as the second
4462/// argument.
4463/// \code
4464/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4465///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4466///   For taskloops:
4467///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4468///   tt->reductions, tt->shareds);
4469///   return 0;
4470/// }
4471/// \endcode
4472static llvm::Function *
4473emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4474                      OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4475                      QualType KmpTaskTWithPrivatesPtrQTy,
4476                      QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4477                      QualType SharedsPtrTy, llvm::Function *TaskFunction,
4478                      llvm::Value *TaskPrivatesMap) {
4479  ASTContext &C = CGM.getContext();
4480  FunctionArgList Args;
4481  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4482                            ImplicitParamDecl::Other);
4483  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4484                                KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4485                                ImplicitParamDecl::Other);
4486  Args.push_back(&GtidArg);
4487  Args.push_back(&TaskTypeArg);
4488  const auto &TaskEntryFnInfo =
4489      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4490  llvm::FunctionType *TaskEntryTy =
4491      CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4492  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4493  auto *TaskEntry = llvm::Function::Create(
4494      TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4495  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4496  TaskEntry->setDoesNotRecurse();
4497  CodeGenFunction CGF(CGM);
4498  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4499                    Loc, Loc);
4500
4501  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4502  // tt,
4503  // For taskloops:
4504  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4505  // tt->task_data.shareds);
4506  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4507      CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4508  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4509      CGF.GetAddrOfLocalVar(&TaskTypeArg),
4510      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4511  const auto *KmpTaskTWithPrivatesQTyRD =
4512      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4513  LValue Base =
4514      CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4515  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4516  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4517  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4518  llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
4519
4520  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4521  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4522  llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4523      CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4524      CGF.ConvertTypeForMem(SharedsPtrTy));
4525
4526  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4527  llvm::Value *PrivatesParam;
4528  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4529    LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4530    PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4531        PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
4532  } else {
4533    PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4534  }
4535
4536  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4537                               TaskPrivatesMap,
4538                               CGF.Builder
4539                                   .CreatePointerBitCastOrAddrSpaceCast(
4540                                       TDBase.getAddress(CGF), CGF.VoidPtrTy)
4541                                   .getPointer()};
4542  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4543                                          std::end(CommonArgs));
4544  if (isOpenMPTaskLoopDirective(Kind)) {
4545    auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4546    LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4547    llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4548    auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4549    LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4550    llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4551    auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4552    LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4553    llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4554    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4555    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4556    llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4557    auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4558    LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4559    llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4560    CallArgs.push_back(LBParam);
4561    CallArgs.push_back(UBParam);
4562    CallArgs.push_back(StParam);
4563    CallArgs.push_back(LIParam);
4564    CallArgs.push_back(RParam);
4565  }
4566  CallArgs.push_back(SharedsParam);
4567
4568  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4569                                                  CallArgs);
4570  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4571                             CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4572  CGF.FinishFunction();
4573  return TaskEntry;
4574}
4575
4576static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4577                                            SourceLocation Loc,
4578                                            QualType KmpInt32Ty,
4579                                            QualType KmpTaskTWithPrivatesPtrQTy,
4580                                            QualType KmpTaskTWithPrivatesQTy) {
4581  ASTContext &C = CGM.getContext();
4582  FunctionArgList Args;
4583  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4584                            ImplicitParamDecl::Other);
4585  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4586                                KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4587                                ImplicitParamDecl::Other);
4588  Args.push_back(&GtidArg);
4589  Args.push_back(&TaskTypeArg);
4590  const auto &DestructorFnInfo =
4591      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4592  llvm::FunctionType *DestructorFnTy =
4593      CGM.getTypes().GetFunctionType(DestructorFnInfo);
4594  std::string Name =
4595      CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4596  auto *DestructorFn =
4597      llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4598                             Name, &CGM.getModule());
4599  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4600                                    DestructorFnInfo);
4601  DestructorFn->setDoesNotRecurse();
4602  CodeGenFunction CGF(CGM);
4603  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4604                    Args, Loc, Loc);
4605
4606  LValue Base = CGF.EmitLoadOfPointerLValue(
4607      CGF.GetAddrOfLocalVar(&TaskTypeArg),
4608      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4609  const auto *KmpTaskTWithPrivatesQTyRD =
4610      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4611  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4612  Base = CGF.EmitLValueForField(Base, *FI);
4613  for (const auto *Field :
4614       cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4615    if (QualType::DestructionKind DtorKind =
4616            Field->getType().isDestructedType()) {
4617      LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4618      CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
4619    }
4620  }
4621  CGF.FinishFunction();
4622  return DestructorFn;
4623}
4624
4625/// Emit a privates mapping function for correct handling of private and
4626/// firstprivate variables.
4627/// \code
4628/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4629/// **noalias priv1,...,  <tyn> **noalias privn) {
4630///   *priv1 = &.privates.priv1;
4631///   ...;
4632///   *privn = &.privates.privn;
4633/// }
4634/// \endcode
4635static llvm::Value *
4636emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4637                               ArrayRef<const Expr *> PrivateVars,
4638                               ArrayRef<const Expr *> FirstprivateVars,
4639                               ArrayRef<const Expr *> LastprivateVars,
4640                               QualType PrivatesQTy,
4641                               ArrayRef<PrivateDataTy> Privates) {
4642  ASTContext &C = CGM.getContext();
4643  FunctionArgList Args;
4644  ImplicitParamDecl TaskPrivatesArg(
4645      C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4646      C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4647      ImplicitParamDecl::Other);
4648  Args.push_back(&TaskPrivatesArg);
4649  llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4650  unsigned Counter = 1;
4651  for (const Expr *E : PrivateVars) {
4652    Args.push_back(ImplicitParamDecl::Create(
4653        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4654        C.getPointerType(C.getPointerType(E->getType()))
4655            .withConst()
4656            .withRestrict(),
4657        ImplicitParamDecl::Other));
4658    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4659    PrivateVarsPos[VD] = Counter;
4660    ++Counter;
4661  }
4662  for (const Expr *E : FirstprivateVars) {
4663    Args.push_back(ImplicitParamDecl::Create(
4664        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4665        C.getPointerType(C.getPointerType(E->getType()))
4666            .withConst()
4667            .withRestrict(),
4668        ImplicitParamDecl::Other));
4669    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4670    PrivateVarsPos[VD] = Counter;
4671    ++Counter;
4672  }
4673  for (const Expr *E : LastprivateVars) {
4674    Args.push_back(ImplicitParamDecl::Create(
4675        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4676        C.getPointerType(C.getPointerType(E->getType()))
4677            .withConst()
4678            .withRestrict(),
4679        ImplicitParamDecl::Other));
4680    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4681    PrivateVarsPos[VD] = Counter;
4682    ++Counter;
4683  }
4684  const auto &TaskPrivatesMapFnInfo =
4685      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4686  llvm::FunctionType *TaskPrivatesMapTy =
4687      CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4688  std::string Name =
4689      CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4690  auto *TaskPrivatesMap = llvm::Function::Create(
4691      TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4692      &CGM.getModule());
4693  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4694                                    TaskPrivatesMapFnInfo);
4695  if (CGM.getLangOpts().Optimize) {
4696    TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4697    TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4698    TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4699  }
4700  CodeGenFunction CGF(CGM);
4701  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4702                    TaskPrivatesMapFnInfo, Args, Loc, Loc);
4703
4704  // *privi = &.privates.privi;
4705  LValue Base = CGF.EmitLoadOfPointerLValue(
4706      CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4707      TaskPrivatesArg.getType()->castAs<PointerType>());
4708  const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4709  Counter = 0;
4710  for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4711    LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4712    const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4713    LValue RefLVal =
4714        CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4715    LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4716        RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
4717    CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
4718    ++Counter;
4719  }
4720  CGF.FinishFunction();
4721  return TaskPrivatesMap;
4722}
4723
4724/// Emit initialization for private variables in task-based directives.
4725static void emitPrivatesInit(CodeGenFunction &CGF,
4726                             const OMPExecutableDirective &D,
4727                             Address KmpTaskSharedsPtr, LValue TDBase,
4728                             const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4729                             QualType SharedsTy, QualType SharedsPtrTy,
4730                             const OMPTaskDataTy &Data,
4731                             ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4732  ASTContext &C = CGF.getContext();
4733  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4734  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4735  OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4736                                 ? OMPD_taskloop
4737                                 : OMPD_task;
4738  const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4739  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4740  LValue SrcBase;
4741  bool IsTargetTask =
4742      isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4743      isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4744  // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4745  // PointersArray and SizesArray. The original variables for these arrays are
4746  // not captured and we get their addresses explicitly.
4747  if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4748      (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4749    SrcBase = CGF.MakeAddrLValue(
4750        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4751            KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4752        SharedsTy);
4753  }
4754  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4755  for (const PrivateDataTy &Pair : Privates) {
4756    const VarDecl *VD = Pair.second.PrivateCopy;
4757    const Expr *Init = VD->getAnyInitializer();
4758    if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4759                             !CGF.isTrivialInitializer(Init)))) {
4760      LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4761      if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4762        const VarDecl *OriginalVD = Pair.second.Original;
4763        // Check if the variable is the target-based BasePointersArray,
4764        // PointersArray or SizesArray.
4765        LValue SharedRefLValue;
4766        QualType Type = PrivateLValue.getType();
4767        const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4768        if (IsTargetTask && !SharedField) {
4769          assert(isa<ImplicitParamDecl>(OriginalVD) &&
4770                 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4771                 cast<CapturedDecl>(OriginalVD->getDeclContext())
4772                         ->getNumParams() == 0 &&
4773                 isa<TranslationUnitDecl>(
4774                     cast<CapturedDecl>(OriginalVD->getDeclContext())
4775                         ->getDeclContext()) &&
4776                 "Expected artificial target data variable.");
4777          SharedRefLValue =
4778              CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4779        } else {
4780          SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4781          SharedRefLValue = CGF.MakeAddrLValue(
4782              Address(SharedRefLValue.getPointer(CGF),
4783                      C.getDeclAlign(OriginalVD)),
4784              SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4785              SharedRefLValue.getTBAAInfo());
4786        }
4787        if (Type->isArrayType()) {
4788          // Initialize firstprivate array.
4789          if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4790            // Perform simple memcpy.
4791            CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4792          } else {
4793            // Initialize firstprivate array using element-by-element
4794            // initialization.
4795            CGF.EmitOMPAggregateAssign(
4796                PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
4797                Type,
4798                [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4799                                                  Address SrcElement) {
4800                  // Clean up any temporaries needed by the initialization.
4801                  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4802                  InitScope.addPrivate(
4803                      Elem, [SrcElement]() -> Address { return SrcElement; });
4804                  (void)InitScope.Privatize();
4805                  // Emit initialization for single element.
4806                  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4807                      CGF, &CapturesInfo);
4808                  CGF.EmitAnyExprToMem(Init, DestElement,
4809                                       Init->getType().getQualifiers(),
4810                                       /*IsInitializer=*/false);
4811                });
4812          }
4813        } else {
4814          CodeGenFunction::OMPPrivateScope InitScope(CGF);
4815          InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
4816            return SharedRefLValue.getAddress(CGF);
4817          });
4818          (void)InitScope.Privatize();
4819          CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4820          CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4821                             /*capturedByInit=*/false);
4822        }
4823      } else {
4824        CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4825      }
4826    }
4827    ++FI;
4828  }
4829}
4830
4831/// Check if duplication function is required for taskloops.
4832static bool checkInitIsRequired(CodeGenFunction &CGF,
4833                                ArrayRef<PrivateDataTy> Privates) {
4834  bool InitRequired = false;
4835  for (const PrivateDataTy &Pair : Privates) {
4836    const VarDecl *VD = Pair.second.PrivateCopy;
4837    const Expr *Init = VD->getAnyInitializer();
4838    InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4839                                    !CGF.isTrivialInitializer(Init));
4840    if (InitRequired)
4841      break;
4842  }
4843  return InitRequired;
4844}
4845
4846
4847/// Emit task_dup function (for initialization of
4848/// private/firstprivate/lastprivate vars and last_iter flag)
4849/// \code
4850/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4851/// lastpriv) {
4852/// // setup lastprivate flag
4853///    task_dst->last = lastpriv;
4854/// // could be constructor calls here...
4855/// }
4856/// \endcode
4857static llvm::Value *
4858emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4859                    const OMPExecutableDirective &D,
4860                    QualType KmpTaskTWithPrivatesPtrQTy,
4861                    const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4862                    const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4863                    QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4864                    ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4865  ASTContext &C = CGM.getContext();
4866  FunctionArgList Args;
4867  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4868                           KmpTaskTWithPrivatesPtrQTy,
4869                           ImplicitParamDecl::Other);
4870  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4871                           KmpTaskTWithPrivatesPtrQTy,
4872                           ImplicitParamDecl::Other);
4873  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4874                                ImplicitParamDecl::Other);
4875  Args.push_back(&DstArg);
4876  Args.push_back(&SrcArg);
4877  Args.push_back(&LastprivArg);
4878  const auto &TaskDupFnInfo =
4879      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4880  llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4881  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4882  auto *TaskDup = llvm::Function::Create(
4883      TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4884  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4885  TaskDup->setDoesNotRecurse();
4886  CodeGenFunction CGF(CGM);
4887  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4888                    Loc);
4889
4890  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4891      CGF.GetAddrOfLocalVar(&DstArg),
4892      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4893  // task_dst->liter = lastpriv;
4894  if (WithLastIter) {
4895    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4896    LValue Base = CGF.EmitLValueForField(
4897        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4898    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4899    llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4900        CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4901    CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4902  }
4903
4904  // Emit initial values for private copies (if any).
4905  assert(!Privates.empty());
4906  Address KmpTaskSharedsPtr = Address::invalid();
4907  if (!Data.FirstprivateVars.empty()) {
4908    LValue TDBase = CGF.EmitLoadOfPointerLValue(
4909        CGF.GetAddrOfLocalVar(&SrcArg),
4910        KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4911    LValue Base = CGF.EmitLValueForField(
4912        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4913    KmpTaskSharedsPtr = Address(
4914        CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4915                                 Base, *std::next(KmpTaskTQTyRD->field_begin(),
4916                                                  KmpTaskTShareds)),
4917                             Loc),
4918        CGF.getNaturalTypeAlignment(SharedsTy));
4919  }
4920  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4921                   SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4922  CGF.FinishFunction();
4923  return TaskDup;
4924}
4925
4926/// Checks if destructor function is required to be generated.
4927/// \return true if cleanups are required, false otherwise.
4928static bool
4929checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4930  bool NeedsCleanup = false;
4931  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4932  const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4933  for (const FieldDecl *FD : PrivateRD->fields()) {
4934    NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4935    if (NeedsCleanup)
4936      break;
4937  }
4938  return NeedsCleanup;
4939}
4940
4941CGOpenMPRuntime::TaskResultTy
4942CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4943                              const OMPExecutableDirective &D,
4944                              llvm::Function *TaskFunction, QualType SharedsTy,
4945                              Address Shareds, const OMPTaskDataTy &Data) {
4946  ASTContext &C = CGM.getContext();
4947  llvm::SmallVector<PrivateDataTy, 4> Privates;
4948  // Aggregate privates and sort them by the alignment.
4949  auto I = Data.PrivateCopies.begin();
4950  for (const Expr *E : Data.PrivateVars) {
4951    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4952    Privates.emplace_back(
4953        C.getDeclAlign(VD),
4954        PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4955                         /*PrivateElemInit=*/nullptr));
4956    ++I;
4957  }
4958  I = Data.FirstprivateCopies.begin();
4959  auto IElemInitRef = Data.FirstprivateInits.begin();
4960  for (const Expr *E : Data.FirstprivateVars) {
4961    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4962    Privates.emplace_back(
4963        C.getDeclAlign(VD),
4964        PrivateHelpersTy(
4965            VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4966            cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4967    ++I;
4968    ++IElemInitRef;
4969  }
4970  I = Data.LastprivateCopies.begin();
4971  for (const Expr *E : Data.LastprivateVars) {
4972    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4973    Privates.emplace_back(
4974        C.getDeclAlign(VD),
4975        PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4976                         /*PrivateElemInit=*/nullptr));
4977    ++I;
4978  }
4979  llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4980    return L.first > R.first;
4981  });
4982  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4983  // Build type kmp_routine_entry_t (if not built yet).
4984  emitKmpRoutineEntryT(KmpInt32Ty);
4985  // Build type kmp_task_t (if not built yet).
4986  if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4987    if (SavedKmpTaskloopTQTy.isNull()) {
4988      SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4989          CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4990    }
4991    KmpTaskTQTy = SavedKmpTaskloopTQTy;
4992  } else {
4993    assert((D.getDirectiveKind() == OMPD_task ||
4994            isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4995            isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4996           "Expected taskloop, task or target directive");
4997    if (SavedKmpTaskTQTy.isNull()) {
4998      SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4999          CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5000    }
5001    KmpTaskTQTy = SavedKmpTaskTQTy;
5002  }
5003  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5004  // Build particular struct kmp_task_t for the given task.
5005  const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5006      createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5007  QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5008  QualType KmpTaskTWithPrivatesPtrQTy =
5009      C.getPointerType(KmpTaskTWithPrivatesQTy);
5010  llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5011  llvm::Type *KmpTaskTWithPrivatesPtrTy =
5012      KmpTaskTWithPrivatesTy->getPointerTo();
5013  llvm::Value *KmpTaskTWithPrivatesTySize =
5014      CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5015  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5016
5017  // Emit initial values for private copies (if any).
5018  llvm::Value *TaskPrivatesMap = nullptr;
5019  llvm::Type *TaskPrivatesMapTy =
5020      std::next(TaskFunction->arg_begin(), 3)->getType();
5021  if (!Privates.empty()) {
5022    auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5023    TaskPrivatesMap = emitTaskPrivateMappingFunction(
5024        CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5025        FI->getType(), Privates);
5026    TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5027        TaskPrivatesMap, TaskPrivatesMapTy);
5028  } else {
5029    TaskPrivatesMap = llvm::ConstantPointerNull::get(
5030        cast<llvm::PointerType>(TaskPrivatesMapTy));
5031  }
5032  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5033  // kmp_task_t *tt);
5034  llvm::Function *TaskEntry = emitProxyTaskFunction(
5035      CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5036      KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5037      TaskPrivatesMap);
5038
5039  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5040  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5041  // kmp_routine_entry_t *task_entry);
5042  // Task flags. Format is taken from
5043  // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5044  // description of kmp_tasking_flags struct.
5045  enum {
5046    TiedFlag = 0x1,
5047    FinalFlag = 0x2,
5048    DestructorsFlag = 0x8,
5049    PriorityFlag = 0x20
5050  };
5051  unsigned Flags = Data.Tied ? TiedFlag : 0;
5052  bool NeedsCleanup = false;
5053  if (!Privates.empty()) {
5054    NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5055    if (NeedsCleanup)
5056      Flags = Flags | DestructorsFlag;
5057  }
5058  if (Data.Priority.getInt())
5059    Flags = Flags | PriorityFlag;
5060  llvm::Value *TaskFlags =
5061      Data.Final.getPointer()
5062          ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5063                                     CGF.Builder.getInt32(FinalFlag),
5064                                     CGF.Builder.getInt32(/*C=*/0))
5065          : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5066  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5067  llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5068  SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5069      getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5070      SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5071          TaskEntry, KmpRoutineEntryPtrTy)};
5072  llvm::Value *NewTask;
5073  if (D.hasClausesOfKind<OMPNowaitClause>()) {
5074    // Check if we have any device clause associated with the directive.
5075    const Expr *Device = nullptr;
5076    if (auto *C = D.getSingleClause<OMPDeviceClause>())
5077      Device = C->getDevice();
5078    // Emit device ID if any otherwise use default value.
5079    llvm::Value *DeviceID;
5080    if (Device)
5081      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5082                                           CGF.Int64Ty, /*isSigned=*/true);
5083    else
5084      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5085    AllocArgs.push_back(DeviceID);
5086    NewTask = CGF.EmitRuntimeCall(
5087      createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5088  } else {
5089    NewTask = CGF.EmitRuntimeCall(
5090      createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5091  }
5092  llvm::Value *NewTaskNewTaskTTy =
5093      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5094          NewTask, KmpTaskTWithPrivatesPtrTy);
5095  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5096                                               KmpTaskTWithPrivatesQTy);
5097  LValue TDBase =
5098      CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5099  // Fill the data in the resulting kmp_task_t record.
5100  // Copy shareds if there are any.
5101  Address KmpTaskSharedsPtr = Address::invalid();
5102  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5103    KmpTaskSharedsPtr =
5104        Address(CGF.EmitLoadOfScalar(
5105                    CGF.EmitLValueForField(
5106                        TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5107                                           KmpTaskTShareds)),
5108                    Loc),
5109                CGF.getNaturalTypeAlignment(SharedsTy));
5110    LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5111    LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5112    CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5113  }
5114  // Emit initial values for private copies (if any).
5115  TaskResultTy Result;
5116  if (!Privates.empty()) {
5117    emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5118                     SharedsTy, SharedsPtrTy, Data, Privates,
5119                     /*ForDup=*/false);
5120    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5121        (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5122      Result.TaskDupFn = emitTaskDupFunction(
5123          CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5124          KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5125          /*WithLastIter=*/!Data.LastprivateVars.empty());
5126    }
5127  }
5128  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5129  enum { Priority = 0, Destructors = 1 };
5130  // Provide pointer to function with destructors for privates.
5131  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5132  const RecordDecl *KmpCmplrdataUD =
5133      (*FI)->getType()->getAsUnionType()->getDecl();
5134  if (NeedsCleanup) {
5135    llvm::Value *DestructorFn = emitDestructorsFunction(
5136        CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5137        KmpTaskTWithPrivatesQTy);
5138    LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5139    LValue DestructorsLV = CGF.EmitLValueForField(
5140        Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5141    CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5142                              DestructorFn, KmpRoutineEntryPtrTy),
5143                          DestructorsLV);
5144  }
5145  // Set priority.
5146  if (Data.Priority.getInt()) {
5147    LValue Data2LV = CGF.EmitLValueForField(
5148        TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5149    LValue PriorityLV = CGF.EmitLValueForField(
5150        Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5151    CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5152  }
5153  Result.NewTask = NewTask;
5154  Result.TaskEntry = TaskEntry;
5155  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5156  Result.TDBase = TDBase;
5157  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5158  return Result;
5159}
5160
5161void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5162                                   const OMPExecutableDirective &D,
5163                                   llvm::Function *TaskFunction,
5164                                   QualType SharedsTy, Address Shareds,
5165                                   const Expr *IfCond,
5166                                   const OMPTaskDataTy &Data) {
5167  if (!CGF.HaveInsertPoint())
5168    return;
5169
5170  TaskResultTy Result =
5171      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5172  llvm::Value *NewTask = Result.NewTask;
5173  llvm::Function *TaskEntry = Result.TaskEntry;
5174  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5175  LValue TDBase = Result.TDBase;
5176  const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5177  ASTContext &C = CGM.getContext();
5178  // Process list of dependences.
5179  Address DependenciesArray = Address::invalid();
5180  unsigned NumDependencies = Data.Dependences.size();
5181  if (NumDependencies) {
5182    // Dependence kind for RTL.
5183    enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5184    enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5185    RecordDecl *KmpDependInfoRD;
5186    QualType FlagsTy =
5187        C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5188    llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5189    if (KmpDependInfoTy.isNull()) {
5190      KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5191      KmpDependInfoRD->startDefinition();
5192      addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5193      addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5194      addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5195      KmpDependInfoRD->completeDefinition();
5196      KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5197    } else {
5198      KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5199    }
5200    // Define type kmp_depend_info[<Dependences.size()>];
5201    QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5202        KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5203        nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5204    // kmp_depend_info[<Dependences.size()>] deps;
5205    DependenciesArray =
5206        CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5207    for (unsigned I = 0; I < NumDependencies; ++I) {
5208      const Expr *E = Data.Dependences[I].second;
5209      LValue Addr = CGF.EmitLValue(E);
5210      llvm::Value *Size;
5211      QualType Ty = E->getType();
5212      if (const auto *ASE =
5213              dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5214        LValue UpAddrLVal =
5215            CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5216        llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
5217            UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
5218        llvm::Value *LowIntPtr =
5219            CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy);
5220        llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5221        Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5222      } else {
5223        Size = CGF.getTypeSize(Ty);
5224      }
5225      LValue Base = CGF.MakeAddrLValue(
5226          CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5227          KmpDependInfoTy);
5228      // deps[i].base_addr = &<Dependences[i].second>;
5229      LValue BaseAddrLVal = CGF.EmitLValueForField(
5230          Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5231      CGF.EmitStoreOfScalar(
5232          CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy),
5233          BaseAddrLVal);
5234      // deps[i].len = sizeof(<Dependences[i].second>);
5235      LValue LenLVal = CGF.EmitLValueForField(
5236          Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5237      CGF.EmitStoreOfScalar(Size, LenLVal);
5238      // deps[i].flags = <Dependences[i].first>;
5239      RTLDependenceKindTy DepKind;
5240      switch (Data.Dependences[I].first) {
5241      case OMPC_DEPEND_in:
5242        DepKind = DepIn;
5243        break;
5244      // Out and InOut dependencies must use the same code.
5245      case OMPC_DEPEND_out:
5246      case OMPC_DEPEND_inout:
5247        DepKind = DepInOut;
5248        break;
5249      case OMPC_DEPEND_mutexinoutset:
5250        DepKind = DepMutexInOutSet;
5251        break;
5252      case OMPC_DEPEND_source:
5253      case OMPC_DEPEND_sink:
5254      case OMPC_DEPEND_unknown:
5255        llvm_unreachable("Unknown task dependence type");
5256      }
5257      LValue FlagsLVal = CGF.EmitLValueForField(
5258          Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5259      CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5260                            FlagsLVal);
5261    }
5262    DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5263        CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5264  }
5265
5266  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5267  // libcall.
5268  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5269  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5270  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5271  // list is not empty
5272  llvm::Value *ThreadID = getThreadID(CGF, Loc);
5273  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5274  llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5275  llvm::Value *DepTaskArgs[7];
5276  if (NumDependencies) {
5277    DepTaskArgs[0] = UpLoc;
5278    DepTaskArgs[1] = ThreadID;
5279    DepTaskArgs[2] = NewTask;
5280    DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5281    DepTaskArgs[4] = DependenciesArray.getPointer();
5282    DepTaskArgs[5] = CGF.Builder.getInt32(0);
5283    DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5284  }
5285  auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5286                        &TaskArgs,
5287                        &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5288    if (!Data.Tied) {
5289      auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5290      LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5291      CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5292    }
5293    if (NumDependencies) {
5294      CGF.EmitRuntimeCall(
5295          createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5296    } else {
5297      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5298                          TaskArgs);
5299    }
5300    // Check if parent region is untied and build return for untied task;
5301    if (auto *Region =
5302            dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5303      Region->emitUntiedSwitch(CGF);
5304  };
5305
5306  llvm::Value *DepWaitTaskArgs[6];
5307  if (NumDependencies) {
5308    DepWaitTaskArgs[0] = UpLoc;
5309    DepWaitTaskArgs[1] = ThreadID;
5310    DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5311    DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5312    DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5313    DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5314  }
5315  auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5316                        NumDependencies, &DepWaitTaskArgs,
5317                        Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5318    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5319    CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5320    // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5321    // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5322    // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5323    // is specified.
5324    if (NumDependencies)
5325      CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5326                          DepWaitTaskArgs);
5327    // Call proxy_task_entry(gtid, new_task);
5328    auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5329                      Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5330      Action.Enter(CGF);
5331      llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5332      CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5333                                                          OutlinedFnArgs);
5334    };
5335
5336    // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5337    // kmp_task_t *new_task);
5338    // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5339    // kmp_task_t *new_task);
5340    RegionCodeGenTy RCG(CodeGen);
5341    CommonActionTy Action(
5342        RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5343        RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5344    RCG.setAction(Action);
5345    RCG(CGF);
5346  };
5347
5348  if (IfCond) {
5349    emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5350  } else {
5351    RegionCodeGenTy ThenRCG(ThenCodeGen);
5352    ThenRCG(CGF);
5353  }
5354}
5355
5356void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5357                                       const OMPLoopDirective &D,
5358                                       llvm::Function *TaskFunction,
5359                                       QualType SharedsTy, Address Shareds,
5360                                       const Expr *IfCond,
5361                                       const OMPTaskDataTy &Data) {
5362  if (!CGF.HaveInsertPoint())
5363    return;
5364  TaskResultTy Result =
5365      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5366  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5367  // libcall.
5368  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5369  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5370  // sched, kmp_uint64 grainsize, void *task_dup);
5371  llvm::Value *ThreadID = getThreadID(CGF, Loc);
5372  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5373  llvm::Value *IfVal;
5374  if (IfCond) {
5375    IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5376                                      /*isSigned=*/true);
5377  } else {
5378    IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5379  }
5380
5381  LValue LBLVal = CGF.EmitLValueForField(
5382      Result.TDBase,
5383      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5384  const auto *LBVar =
5385      cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5386  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5387                       LBLVal.getQuals(),
5388                       /*IsInitializer=*/true);
5389  LValue UBLVal = CGF.EmitLValueForField(
5390      Result.TDBase,
5391      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5392  const auto *UBVar =
5393      cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5394  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5395                       UBLVal.getQuals(),
5396                       /*IsInitializer=*/true);
5397  LValue StLVal = CGF.EmitLValueForField(
5398      Result.TDBase,
5399      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5400  const auto *StVar =
5401      cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5402  CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5403                       StLVal.getQuals(),
5404                       /*IsInitializer=*/true);
5405  // Store reductions address.
5406  LValue RedLVal = CGF.EmitLValueForField(
5407      Result.TDBase,
5408      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5409  if (Data.Reductions) {
5410    CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5411  } else {
5412    CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5413                               CGF.getContext().VoidPtrTy);
5414  }
5415  enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5416  llvm::Value *TaskArgs[] = {
5417      UpLoc,
5418      ThreadID,
5419      Result.NewTask,
5420      IfVal,
5421      LBLVal.getPointer(CGF),
5422      UBLVal.getPointer(CGF),
5423      CGF.EmitLoadOfScalar(StLVal, Loc),
5424      llvm::ConstantInt::getSigned(
5425          CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5426      llvm::ConstantInt::getSigned(
5427          CGF.IntTy, Data.Schedule.getPointer()
5428                         ? Data.Schedule.getInt() ? NumTasks : Grainsize
5429                         : NoSchedule),
5430      Data.Schedule.getPointer()
5431          ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5432                                      /*isSigned=*/false)
5433          : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5434      Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5435                             Result.TaskDupFn, CGF.VoidPtrTy)
5436                       : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5437  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5438}
5439
5440/// Emit reduction operation for each element of array (required for
5441/// array sections) LHS op = RHS.
5442/// \param Type Type of array.
5443/// \param LHSVar Variable on the left side of the reduction operation
5444/// (references element of array in original variable).
5445/// \param RHSVar Variable on the right side of the reduction operation
5446/// (references element of array in original variable).
5447/// \param RedOpGen Generator of reduction operation with use of LHSVar and
5448/// RHSVar.
5449static void EmitOMPAggregateReduction(
5450    CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5451    const VarDecl *RHSVar,
5452    const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5453                                  const Expr *, const Expr *)> &RedOpGen,
5454    const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5455    const Expr *UpExpr = nullptr) {
5456  // Perform element-by-element initialization.
5457  QualType ElementTy;
5458  Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5459  Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5460
5461  // Drill down to the base element type on both arrays.
5462  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5463  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5464
5465  llvm::Value *RHSBegin = RHSAddr.getPointer();
5466  llvm::Value *LHSBegin = LHSAddr.getPointer();
5467  // Cast from pointer to array type to pointer to single element.
5468  llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5469  // The basic structure here is a while-do loop.
5470  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5471  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5472  llvm::Value *IsEmpty =
5473      CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5474  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5475
5476  // Enter the loop body, making that address the current address.
5477  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5478  CGF.EmitBlock(BodyBB);
5479
5480  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5481
5482  llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5483      RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5484  RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5485  Address RHSElementCurrent =
5486      Address(RHSElementPHI,
5487              RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5488
5489  llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5490      LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5491  LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5492  Address LHSElementCurrent =
5493      Address(LHSElementPHI,
5494              LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5495
5496  // Emit copy.
5497  CodeGenFunction::OMPPrivateScope Scope(CGF);
5498  Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5499  Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5500  Scope.Privatize();
5501  RedOpGen(CGF, XExpr, EExpr, UpExpr);
5502  Scope.ForceCleanup();
5503
5504  // Shift the address forward by one element.
5505  llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5506      LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5507  llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5508      RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5509  // Check whether we've reached the end.
5510  llvm::Value *Done =
5511      CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5512  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5513  LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5514  RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5515
5516  // Done.
5517  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5518}
5519
5520/// Emit reduction combiner. If the combiner is a simple expression emit it as
5521/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5522/// UDR combiner function.
5523static void emitReductionCombiner(CodeGenFunction &CGF,
5524                                  const Expr *ReductionOp) {
5525  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5526    if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5527      if (const auto *DRE =
5528              dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5529        if (const auto *DRD =
5530                dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5531          std::pair<llvm::Function *, llvm::Function *> Reduction =
5532              CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5533          RValue Func = RValue::get(Reduction.first);
5534          CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5535          CGF.EmitIgnoredExpr(ReductionOp);
5536          return;
5537        }
5538  CGF.EmitIgnoredExpr(ReductionOp);
5539}
5540
5541llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5542    SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5543    ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5544    ArrayRef<const Expr *> ReductionOps) {
5545  ASTContext &C = CGM.getContext();
5546
5547  // void reduction_func(void *LHSArg, void *RHSArg);
5548  FunctionArgList Args;
5549  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5550                           ImplicitParamDecl::Other);
5551  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5552                           ImplicitParamDecl::Other);
5553  Args.push_back(&LHSArg);
5554  Args.push_back(&RHSArg);
5555  const auto &CGFI =
5556      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5557  std::string Name = getName({"omp", "reduction", "reduction_func"});
5558  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5559                                    llvm::GlobalValue::InternalLinkage, Name,
5560                                    &CGM.getModule());
5561  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5562  Fn->setDoesNotRecurse();
5563  CodeGenFunction CGF(CGM);
5564  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5565
5566  // Dst = (void*[n])(LHSArg);
5567  // Src = (void*[n])(RHSArg);
5568  Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5569      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5570      ArgsType), CGF.getPointerAlign());
5571  Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5572      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5573      ArgsType), CGF.getPointerAlign());
5574
5575  //  ...
5576  //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5577  //  ...
5578  CodeGenFunction::OMPPrivateScope Scope(CGF);
5579  auto IPriv = Privates.begin();
5580  unsigned Idx = 0;
5581  for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5582    const auto *RHSVar =
5583        cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5584    Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5585      return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5586    });
5587    const auto *LHSVar =
5588        cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5589    Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5590      return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5591    });
5592    QualType PrivTy = (*IPriv)->getType();
5593    if (PrivTy->isVariablyModifiedType()) {
5594      // Get array size and emit VLA type.
5595      ++Idx;
5596      Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5597      llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5598      const VariableArrayType *VLA =
5599          CGF.getContext().getAsVariableArrayType(PrivTy);
5600      const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5601      CodeGenFunction::OpaqueValueMapping OpaqueMap(
5602          CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5603      CGF.EmitVariablyModifiedType(PrivTy);
5604    }
5605  }
5606  Scope.Privatize();
5607  IPriv = Privates.begin();
5608  auto ILHS = LHSExprs.begin();
5609  auto IRHS = RHSExprs.begin();
5610  for (const Expr *E : ReductionOps) {
5611    if ((*IPriv)->getType()->isArrayType()) {
5612      // Emit reduction for array section.
5613      const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5614      const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5615      EmitOMPAggregateReduction(
5616          CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5617          [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5618            emitReductionCombiner(CGF, E);
5619          });
5620    } else {
5621      // Emit reduction for array subscript or single variable.
5622      emitReductionCombiner(CGF, E);
5623    }
5624    ++IPriv;
5625    ++ILHS;
5626    ++IRHS;
5627  }
5628  Scope.ForceCleanup();
5629  CGF.FinishFunction();
5630  return Fn;
5631}
5632
5633void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5634                                                  const Expr *ReductionOp,
5635                                                  const Expr *PrivateRef,
5636                                                  const DeclRefExpr *LHS,
5637                                                  const DeclRefExpr *RHS) {
5638  if (PrivateRef->getType()->isArrayType()) {
5639    // Emit reduction for array section.
5640    const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5641    const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5642    EmitOMPAggregateReduction(
5643        CGF, PrivateRef->getType(), LHSVar, RHSVar,
5644        [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5645          emitReductionCombiner(CGF, ReductionOp);
5646        });
5647  } else {
5648    // Emit reduction for array subscript or single variable.
5649    emitReductionCombiner(CGF, ReductionOp);
5650  }
5651}
5652
5653void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5654                                    ArrayRef<const Expr *> Privates,
5655                                    ArrayRef<const Expr *> LHSExprs,
5656                                    ArrayRef<const Expr *> RHSExprs,
5657                                    ArrayRef<const Expr *> ReductionOps,
5658                                    ReductionOptionsTy Options) {
5659  if (!CGF.HaveInsertPoint())
5660    return;
5661
5662  bool WithNowait = Options.WithNowait;
5663  bool SimpleReduction = Options.SimpleReduction;
5664
5665  // Next code should be emitted for reduction:
5666  //
5667  // static kmp_critical_name lock = { 0 };
5668  //
5669  // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5670  //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5671  //  ...
5672  //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5673  //  *(Type<n>-1*)rhs[<n>-1]);
5674  // }
5675  //
5676  // ...
5677  // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5678  // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5679  // RedList, reduce_func, &<lock>)) {
5680  // case 1:
5681  //  ...
5682  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5683  //  ...
5684  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5685  // break;
5686  // case 2:
5687  //  ...
5688  //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5689  //  ...
5690  // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5691  // break;
5692  // default:;
5693  // }
5694  //
5695  // if SimpleReduction is true, only the next code is generated:
5696  //  ...
5697  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5698  //  ...
5699
5700  ASTContext &C = CGM.getContext();
5701
5702  if (SimpleReduction) {
5703    CodeGenFunction::RunCleanupsScope Scope(CGF);
5704    auto IPriv = Privates.begin();
5705    auto ILHS = LHSExprs.begin();
5706    auto IRHS = RHSExprs.begin();
5707    for (const Expr *E : ReductionOps) {
5708      emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5709                                  cast<DeclRefExpr>(*IRHS));
5710      ++IPriv;
5711      ++ILHS;
5712      ++IRHS;
5713    }
5714    return;
5715  }
5716
5717  // 1. Build a list of reduction variables.
5718  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5719  auto Size = RHSExprs.size();
5720  for (const Expr *E : Privates) {
5721    if (E->getType()->isVariablyModifiedType())
5722      // Reserve place for array size.
5723      ++Size;
5724  }
5725  llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5726  QualType ReductionArrayTy =
5727      C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5728                             /*IndexTypeQuals=*/0);
5729  Address ReductionList =
5730      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5731  auto IPriv = Privates.begin();
5732  unsigned Idx = 0;
5733  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5734    Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5735    CGF.Builder.CreateStore(
5736        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5737            CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5738        Elem);
5739    if ((*IPriv)->getType()->isVariablyModifiedType()) {
5740      // Store array size.
5741      ++Idx;
5742      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5743      llvm::Value *Size = CGF.Builder.CreateIntCast(
5744          CGF.getVLASize(
5745                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5746              .NumElts,
5747          CGF.SizeTy, /*isSigned=*/false);
5748      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5749                              Elem);
5750    }
5751  }
5752
5753  // 2. Emit reduce_func().
5754  llvm::Function *ReductionFn = emitReductionFunction(
5755      Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5756      LHSExprs, RHSExprs, ReductionOps);
5757
5758  // 3. Create static kmp_critical_name lock = { 0 };
5759  std::string Name = getName({"reduction"});
5760  llvm::Value *Lock = getCriticalRegionLock(Name);
5761
5762  // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5763  // RedList, reduce_func, &<lock>);
5764  llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5765  llvm::Value *ThreadId = getThreadID(CGF, Loc);
5766  llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5767  llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5768      ReductionList.getPointer(), CGF.VoidPtrTy);
5769  llvm::Value *Args[] = {
5770      IdentTLoc,                             // ident_t *<loc>
5771      ThreadId,                              // i32 <gtid>
5772      CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5773      ReductionArrayTySize,                  // size_type sizeof(RedList)
5774      RL,                                    // void *RedList
5775      ReductionFn, // void (*) (void *, void *) <reduce_func>
5776      Lock         // kmp_critical_name *&<lock>
5777  };
5778  llvm::Value *Res = CGF.EmitRuntimeCall(
5779      createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5780                                       : OMPRTL__kmpc_reduce),
5781      Args);
5782
5783  // 5. Build switch(res)
5784  llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5785  llvm::SwitchInst *SwInst =
5786      CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5787
5788  // 6. Build case 1:
5789  //  ...
5790  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5791  //  ...
5792  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5793  // break;
5794  llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5795  SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5796  CGF.EmitBlock(Case1BB);
5797
5798  // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5799  llvm::Value *EndArgs[] = {
5800      IdentTLoc, // ident_t *<loc>
5801      ThreadId,  // i32 <gtid>
5802      Lock       // kmp_critical_name *&<lock>
5803  };
5804  auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5805                       CodeGenFunction &CGF, PrePostActionTy &Action) {
5806    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5807    auto IPriv = Privates.begin();
5808    auto ILHS = LHSExprs.begin();
5809    auto IRHS = RHSExprs.begin();
5810    for (const Expr *E : ReductionOps) {
5811      RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5812                                     cast<DeclRefExpr>(*IRHS));
5813      ++IPriv;
5814      ++ILHS;
5815      ++IRHS;
5816    }
5817  };
5818  RegionCodeGenTy RCG(CodeGen);
5819  CommonActionTy Action(
5820      nullptr, llvm::None,
5821      createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5822                                       : OMPRTL__kmpc_end_reduce),
5823      EndArgs);
5824  RCG.setAction(Action);
5825  RCG(CGF);
5826
5827  CGF.EmitBranch(DefaultBB);
5828
5829  // 7. Build case 2:
5830  //  ...
5831  //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5832  //  ...
5833  // break;
5834  llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5835  SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5836  CGF.EmitBlock(Case2BB);
5837
5838  auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5839                             CodeGenFunction &CGF, PrePostActionTy &Action) {
5840    auto ILHS = LHSExprs.begin();
5841    auto IRHS = RHSExprs.begin();
5842    auto IPriv = Privates.begin();
5843    for (const Expr *E : ReductionOps) {
5844      const Expr *XExpr = nullptr;
5845      const Expr *EExpr = nullptr;
5846      const Expr *UpExpr = nullptr;
5847      BinaryOperatorKind BO = BO_Comma;
5848      if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5849        if (BO->getOpcode() == BO_Assign) {
5850          XExpr = BO->getLHS();
5851          UpExpr = BO->getRHS();
5852        }
5853      }
5854      // Try to emit update expression as a simple atomic.
5855      const Expr *RHSExpr = UpExpr;
5856      if (RHSExpr) {
5857        // Analyze RHS part of the whole expression.
5858        if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5859                RHSExpr->IgnoreParenImpCasts())) {
5860          // If this is a conditional operator, analyze its condition for
5861          // min/max reduction operator.
5862          RHSExpr = ACO->getCond();
5863        }
5864        if (const auto *BORHS =
5865                dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5866          EExpr = BORHS->getRHS();
5867          BO = BORHS->getOpcode();
5868        }
5869      }
5870      if (XExpr) {
5871        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5872        auto &&AtomicRedGen = [BO, VD,
5873                               Loc](CodeGenFunction &CGF, const Expr *XExpr,
5874                                    const Expr *EExpr, const Expr *UpExpr) {
5875          LValue X = CGF.EmitLValue(XExpr);
5876          RValue E;
5877          if (EExpr)
5878            E = CGF.EmitAnyExpr(EExpr);
5879          CGF.EmitOMPAtomicSimpleUpdateExpr(
5880              X, E, BO, /*IsXLHSInRHSPart=*/true,
5881              llvm::AtomicOrdering::Monotonic, Loc,
5882              [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5883                CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5884                PrivateScope.addPrivate(
5885                    VD, [&CGF, VD, XRValue, Loc]() {
5886                      Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5887                      CGF.emitOMPSimpleStore(
5888                          CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5889                          VD->getType().getNonReferenceType(), Loc);
5890                      return LHSTemp;
5891                    });
5892                (void)PrivateScope.Privatize();
5893                return CGF.EmitAnyExpr(UpExpr);
5894              });
5895        };
5896        if ((*IPriv)->getType()->isArrayType()) {
5897          // Emit atomic reduction for array section.
5898          const auto *RHSVar =
5899              cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5900          EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5901                                    AtomicRedGen, XExpr, EExpr, UpExpr);
5902        } else {
5903          // Emit atomic reduction for array subscript or single variable.
5904          AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5905        }
5906      } else {
5907        // Emit as a critical region.
5908        auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5909                                           const Expr *, const Expr *) {
5910          CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5911          std::string Name = RT.getName({"atomic_reduction"});
5912          RT.emitCriticalRegion(
5913              CGF, Name,
5914              [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5915                Action.Enter(CGF);
5916                emitReductionCombiner(CGF, E);
5917              },
5918              Loc);
5919        };
5920        if ((*IPriv)->getType()->isArrayType()) {
5921          const auto *LHSVar =
5922              cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5923          const auto *RHSVar =
5924              cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5925          EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5926                                    CritRedGen);
5927        } else {
5928          CritRedGen(CGF, nullptr, nullptr, nullptr);
5929        }
5930      }
5931      ++ILHS;
5932      ++IRHS;
5933      ++IPriv;
5934    }
5935  };
5936  RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5937  if (!WithNowait) {
5938    // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5939    llvm::Value *EndArgs[] = {
5940        IdentTLoc, // ident_t *<loc>
5941        ThreadId,  // i32 <gtid>
5942        Lock       // kmp_critical_name *&<lock>
5943    };
5944    CommonActionTy Action(nullptr, llvm::None,
5945                          createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5946                          EndArgs);
5947    AtomicRCG.setAction(Action);
5948    AtomicRCG(CGF);
5949  } else {
5950    AtomicRCG(CGF);
5951  }
5952
5953  CGF.EmitBranch(DefaultBB);
5954  CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5955}
5956
5957/// Generates unique name for artificial threadprivate variables.
5958/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5959static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5960                                      const Expr *Ref) {
5961  SmallString<256> Buffer;
5962  llvm::raw_svector_ostream Out(Buffer);
5963  const clang::DeclRefExpr *DE;
5964  const VarDecl *D = ::getBaseDecl(Ref, DE);
5965  if (!D)
5966    D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5967  D = D->getCanonicalDecl();
5968  std::string Name = CGM.getOpenMPRuntime().getName(
5969      {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5970  Out << Prefix << Name << "_"
5971      << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5972  return Out.str();
5973}
5974
5975/// Emits reduction initializer function:
5976/// \code
5977/// void @.red_init(void* %arg) {
5978/// %0 = bitcast void* %arg to <type>*
5979/// store <type> <init>, <type>* %0
5980/// ret void
5981/// }
5982/// \endcode
5983static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5984                                           SourceLocation Loc,
5985                                           ReductionCodeGen &RCG, unsigned N) {
5986  ASTContext &C = CGM.getContext();
5987  FunctionArgList Args;
5988  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5989                          ImplicitParamDecl::Other);
5990  Args.emplace_back(&Param);
5991  const auto &FnInfo =
5992      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5993  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5994  std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5995  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5996                                    Name, &CGM.getModule());
5997  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5998  Fn->setDoesNotRecurse();
5999  CodeGenFunction CGF(CGM);
6000  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6001  Address PrivateAddr = CGF.EmitLoadOfPointer(
6002      CGF.GetAddrOfLocalVar(&Param),
6003      C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6004  llvm::Value *Size = nullptr;
6005  // If the size of the reduction item is non-constant, load it from global
6006  // threadprivate variable.
6007  if (RCG.getSizes(N).second) {
6008    Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6009        CGF, CGM.getContext().getSizeType(),
6010        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6011    Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6012                                CGM.getContext().getSizeType(), Loc);
6013  }
6014  RCG.emitAggregateType(CGF, N, Size);
6015  LValue SharedLVal;
6016  // If initializer uses initializer from declare reduction construct, emit a
6017  // pointer to the address of the original reduction item (reuired by reduction
6018  // initializer)
6019  if (RCG.usesReductionInitializer(N)) {
6020    Address SharedAddr =
6021        CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6022            CGF, CGM.getContext().VoidPtrTy,
6023            generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6024    SharedAddr = CGF.EmitLoadOfPointer(
6025        SharedAddr,
6026        CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6027    SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6028  } else {
6029    SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6030        llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6031        CGM.getContext().VoidPtrTy);
6032  }
6033  // Emit the initializer:
6034  // %0 = bitcast void* %arg to <type>*
6035  // store <type> <init>, <type>* %0
6036  RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6037                         [](CodeGenFunction &) { return false; });
6038  CGF.FinishFunction();
6039  return Fn;
6040}
6041
6042/// Emits reduction combiner function:
6043/// \code
6044/// void @.red_comb(void* %arg0, void* %arg1) {
6045/// %lhs = bitcast void* %arg0 to <type>*
6046/// %rhs = bitcast void* %arg1 to <type>*
6047/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6048/// store <type> %2, <type>* %lhs
6049/// ret void
6050/// }
6051/// \endcode
6052static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6053                                           SourceLocation Loc,
6054                                           ReductionCodeGen &RCG, unsigned N,
6055                                           const Expr *ReductionOp,
6056                                           const Expr *LHS, const Expr *RHS,
6057                                           const Expr *PrivateRef) {
6058  ASTContext &C = CGM.getContext();
6059  const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6060  const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6061  FunctionArgList Args;
6062  ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6063                               C.VoidPtrTy, ImplicitParamDecl::Other);
6064  ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6065                            ImplicitParamDecl::Other);
6066  Args.emplace_back(&ParamInOut);
6067  Args.emplace_back(&ParamIn);
6068  const auto &FnInfo =
6069      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6070  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6071  std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6072  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6073                                    Name, &CGM.getModule());
6074  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6075  Fn->setDoesNotRecurse();
6076  CodeGenFunction CGF(CGM);
6077  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6078  llvm::Value *Size = nullptr;
6079  // If the size of the reduction item is non-constant, load it from global
6080  // threadprivate variable.
6081  if (RCG.getSizes(N).second) {
6082    Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6083        CGF, CGM.getContext().getSizeType(),
6084        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6085    Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6086                                CGM.getContext().getSizeType(), Loc);
6087  }
6088  RCG.emitAggregateType(CGF, N, Size);
6089  // Remap lhs and rhs variables to the addresses of the function arguments.
6090  // %lhs = bitcast void* %arg0 to <type>*
6091  // %rhs = bitcast void* %arg1 to <type>*
6092  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6093  PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6094    // Pull out the pointer to the variable.
6095    Address PtrAddr = CGF.EmitLoadOfPointer(
6096        CGF.GetAddrOfLocalVar(&ParamInOut),
6097        C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6098    return CGF.Builder.CreateElementBitCast(
6099        PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6100  });
6101  PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6102    // Pull out the pointer to the variable.
6103    Address PtrAddr = CGF.EmitLoadOfPointer(
6104        CGF.GetAddrOfLocalVar(&ParamIn),
6105        C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6106    return CGF.Builder.CreateElementBitCast(
6107        PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6108  });
6109  PrivateScope.Privatize();
6110  // Emit the combiner body:
6111  // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6112  // store <type> %2, <type>* %lhs
6113  CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6114      CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6115      cast<DeclRefExpr>(RHS));
6116  CGF.FinishFunction();
6117  return Fn;
6118}
6119
6120/// Emits reduction finalizer function:
6121/// \code
6122/// void @.red_fini(void* %arg) {
6123/// %0 = bitcast void* %arg to <type>*
6124/// <destroy>(<type>* %0)
6125/// ret void
6126/// }
6127/// \endcode
6128static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6129                                           SourceLocation Loc,
6130                                           ReductionCodeGen &RCG, unsigned N) {
6131  if (!RCG.needCleanups(N))
6132    return nullptr;
6133  ASTContext &C = CGM.getContext();
6134  FunctionArgList Args;
6135  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6136                          ImplicitParamDecl::Other);
6137  Args.emplace_back(&Param);
6138  const auto &FnInfo =
6139      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6140  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6141  std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6142  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6143                                    Name, &CGM.getModule());
6144  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6145  Fn->setDoesNotRecurse();
6146  CodeGenFunction CGF(CGM);
6147  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6148  Address PrivateAddr = CGF.EmitLoadOfPointer(
6149      CGF.GetAddrOfLocalVar(&Param),
6150      C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6151  llvm::Value *Size = nullptr;
6152  // If the size of the reduction item is non-constant, load it from global
6153  // threadprivate variable.
6154  if (RCG.getSizes(N).second) {
6155    Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6156        CGF, CGM.getContext().getSizeType(),
6157        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6158    Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6159                                CGM.getContext().getSizeType(), Loc);
6160  }
6161  RCG.emitAggregateType(CGF, N, Size);
6162  // Emit the finalizer body:
6163  // <destroy>(<type>* %0)
6164  RCG.emitCleanups(CGF, N, PrivateAddr);
6165  CGF.FinishFunction(Loc);
6166  return Fn;
6167}
6168
6169llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6170    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6171    ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6172  if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6173    return nullptr;
6174
6175  // Build typedef struct:
6176  // kmp_task_red_input {
6177  //   void *reduce_shar; // shared reduction item
6178  //   size_t reduce_size; // size of data item
6179  //   void *reduce_init; // data initialization routine
6180  //   void *reduce_fini; // data finalization routine
6181  //   void *reduce_comb; // data combiner routine
6182  //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6183  // } kmp_task_red_input_t;
6184  ASTContext &C = CGM.getContext();
6185  RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6186  RD->startDefinition();
6187  const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6188  const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6189  const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6190  const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6191  const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6192  const FieldDecl *FlagsFD = addFieldToRecordDecl(
6193      C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6194  RD->completeDefinition();
6195  QualType RDType = C.getRecordType(RD);
6196  unsigned Size = Data.ReductionVars.size();
6197  llvm::APInt ArraySize(/*numBits=*/64, Size);
6198  QualType ArrayRDType = C.getConstantArrayType(
6199      RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6200  // kmp_task_red_input_t .rd_input.[Size];
6201  Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6202  ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6203                       Data.ReductionOps);
6204  for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6205    // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6206    llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6207                           llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6208    llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6209        TaskRedInput.getPointer(), Idxs,
6210        /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6211        ".rd_input.gep.");
6212    LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6213    // ElemLVal.reduce_shar = &Shareds[Cnt];
6214    LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6215    RCG.emitSharedLValue(CGF, Cnt);
6216    llvm::Value *CastedShared =
6217        CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6218    CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6219    RCG.emitAggregateType(CGF, Cnt);
6220    llvm::Value *SizeValInChars;
6221    llvm::Value *SizeVal;
6222    std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6223    // We use delayed creation/initialization for VLAs, array sections and
6224    // custom reduction initializations. It is required because runtime does not
6225    // provide the way to pass the sizes of VLAs/array sections to
6226    // initializer/combiner/finalizer functions and does not pass the pointer to
6227    // original reduction item to the initializer. Instead threadprivate global
6228    // variables are used to store these values and use them in the functions.
6229    bool DelayedCreation = !!SizeVal;
6230    SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6231                                               /*isSigned=*/false);
6232    LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6233    CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6234    // ElemLVal.reduce_init = init;
6235    LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6236    llvm::Value *InitAddr =
6237        CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6238    CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6239    DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6240    // ElemLVal.reduce_fini = fini;
6241    LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6242    llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6243    llvm::Value *FiniAddr = Fini
6244                                ? CGF.EmitCastToVoidPtr(Fini)
6245                                : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6246    CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6247    // ElemLVal.reduce_comb = comb;
6248    LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6249    llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6250        CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6251        RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6252    CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6253    // ElemLVal.flags = 0;
6254    LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6255    if (DelayedCreation) {
6256      CGF.EmitStoreOfScalar(
6257          llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6258          FlagsLVal);
6259    } else
6260      CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6261                                 FlagsLVal.getType());
6262  }
6263  // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6264  // *data);
6265  llvm::Value *Args[] = {
6266      CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6267                                /*isSigned=*/true),
6268      llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6269      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6270                                                      CGM.VoidPtrTy)};
6271  return CGF.EmitRuntimeCall(
6272      createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6273}
6274
6275void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6276                                              SourceLocation Loc,
6277                                              ReductionCodeGen &RCG,
6278                                              unsigned N) {
6279  auto Sizes = RCG.getSizes(N);
6280  // Emit threadprivate global variable if the type is non-constant
6281  // (Sizes.second = nullptr).
6282  if (Sizes.second) {
6283    llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6284                                                     /*isSigned=*/false);
6285    Address SizeAddr = getAddrOfArtificialThreadPrivate(
6286        CGF, CGM.getContext().getSizeType(),
6287        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6288    CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6289  }
6290  // Store address of the original reduction item if custom initializer is used.
6291  if (RCG.usesReductionInitializer(N)) {
6292    Address SharedAddr = getAddrOfArtificialThreadPrivate(
6293        CGF, CGM.getContext().VoidPtrTy,
6294        generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6295    CGF.Builder.CreateStore(
6296        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6297            RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
6298        SharedAddr, /*IsVolatile=*/false);
6299  }
6300}
6301
6302Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6303                                              SourceLocation Loc,
6304                                              llvm::Value *ReductionsPtr,
6305                                              LValue SharedLVal) {
6306  // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6307  // *d);
6308  llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6309                                                   CGM.IntTy,
6310                                                   /*isSigned=*/true),
6311                         ReductionsPtr,
6312                         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6313                             SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6314  return Address(
6315      CGF.EmitRuntimeCall(
6316          createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6317      SharedLVal.getAlignment());
6318}
6319
6320void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6321                                       SourceLocation Loc) {
6322  if (!CGF.HaveInsertPoint())
6323    return;
6324  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6325  // global_tid);
6326  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6327  // Ignore return result until untied tasks are supported.
6328  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6329  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6330    Region->emitUntiedSwitch(CGF);
6331}
6332
6333void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6334                                           OpenMPDirectiveKind InnerKind,
6335                                           const RegionCodeGenTy &CodeGen,
6336                                           bool HasCancel) {
6337  if (!CGF.HaveInsertPoint())
6338    return;
6339  InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6340  CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6341}
6342
6343namespace {
6344enum RTCancelKind {
6345  CancelNoreq = 0,
6346  CancelParallel = 1,
6347  CancelLoop = 2,
6348  CancelSections = 3,
6349  CancelTaskgroup = 4
6350};
6351} // anonymous namespace
6352
6353static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6354  RTCancelKind CancelKind = CancelNoreq;
6355  if (CancelRegion == OMPD_parallel)
6356    CancelKind = CancelParallel;
6357  else if (CancelRegion == OMPD_for)
6358    CancelKind = CancelLoop;
6359  else if (CancelRegion == OMPD_sections)
6360    CancelKind = CancelSections;
6361  else {
6362    assert(CancelRegion == OMPD_taskgroup);
6363    CancelKind = CancelTaskgroup;
6364  }
6365  return CancelKind;
6366}
6367
6368void CGOpenMPRuntime::emitCancellationPointCall(
6369    CodeGenFunction &CGF, SourceLocation Loc,
6370    OpenMPDirectiveKind CancelRegion) {
6371  if (!CGF.HaveInsertPoint())
6372    return;
6373  // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6374  // global_tid, kmp_int32 cncl_kind);
6375  if (auto *OMPRegionInfo =
6376          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6377    // For 'cancellation point taskgroup', the task region info may not have a
6378    // cancel. This may instead happen in another adjacent task.
6379    if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6380      llvm::Value *Args[] = {
6381          emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6382          CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6383      // Ignore return result until untied tasks are supported.
6384      llvm::Value *Result = CGF.EmitRuntimeCall(
6385          createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6386      // if (__kmpc_cancellationpoint()) {
6387      //   exit from construct;
6388      // }
6389      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6390      llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6391      llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6392      CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6393      CGF.EmitBlock(ExitBB);
6394      // exit from construct;
6395      CodeGenFunction::JumpDest CancelDest =
6396          CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6397      CGF.EmitBranchThroughCleanup(CancelDest);
6398      CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6399    }
6400  }
6401}
6402
6403void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6404                                     const Expr *IfCond,
6405                                     OpenMPDirectiveKind CancelRegion) {
6406  if (!CGF.HaveInsertPoint())
6407    return;
6408  // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6409  // kmp_int32 cncl_kind);
6410  if (auto *OMPRegionInfo =
6411          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6412    auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6413                                                        PrePostActionTy &) {
6414      CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6415      llvm::Value *Args[] = {
6416          RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6417          CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6418      // Ignore return result until untied tasks are supported.
6419      llvm::Value *Result = CGF.EmitRuntimeCall(
6420          RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6421      // if (__kmpc_cancel()) {
6422      //   exit from construct;
6423      // }
6424      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6425      llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6426      llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6427      CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6428      CGF.EmitBlock(ExitBB);
6429      // exit from construct;
6430      CodeGenFunction::JumpDest CancelDest =
6431          CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6432      CGF.EmitBranchThroughCleanup(CancelDest);
6433      CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6434    };
6435    if (IfCond) {
6436      emitIfClause(CGF, IfCond, ThenGen,
6437                   [](CodeGenFunction &, PrePostActionTy &) {});
6438    } else {
6439      RegionCodeGenTy ThenRCG(ThenGen);
6440      ThenRCG(CGF);
6441    }
6442  }
6443}
6444
6445void CGOpenMPRuntime::emitTargetOutlinedFunction(
6446    const OMPExecutableDirective &D, StringRef ParentName,
6447    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6448    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6449  assert(!ParentName.empty() && "Invalid target region parent name!");
6450  HasEmittedTargetRegion = true;
6451  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6452                                   IsOffloadEntry, CodeGen);
6453}
6454
6455void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6456    const OMPExecutableDirective &D, StringRef ParentName,
6457    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6458    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6459  // Create a unique name for the entry function using the source location
6460  // information of the current target region. The name will be something like:
6461  //
6462  // __omp_offloading_DD_FFFF_PP_lBB
6463  //
6464  // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6465  // mangled name of the function that encloses the target region and BB is the
6466  // line number of the target region.
6467
6468  unsigned DeviceID;
6469  unsigned FileID;
6470  unsigned Line;
6471  getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6472                           Line);
6473  SmallString<64> EntryFnName;
6474  {
6475    llvm::raw_svector_ostream OS(EntryFnName);
6476    OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6477       << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6478  }
6479
6480  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6481
6482  CodeGenFunction CGF(CGM, true);
6483  CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6484  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6485
6486  OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6487
6488  // If this target outline function is not an offload entry, we don't need to
6489  // register it.
6490  if (!IsOffloadEntry)
6491    return;
6492
6493  // The target region ID is used by the runtime library to identify the current
6494  // target region, so it only has to be unique and not necessarily point to
6495  // anything. It could be the pointer to the outlined function that implements
6496  // the target region, but we aren't using that so that the compiler doesn't
6497  // need to keep that, and could therefore inline the host function if proven
6498  // worthwhile during optimization. In the other hand, if emitting code for the
6499  // device, the ID has to be the function address so that it can retrieved from
6500  // the offloading entry and launched by the runtime library. We also mark the
6501  // outlined function to have external linkage in case we are emitting code for
6502  // the device, because these functions will be entry points to the device.
6503
6504  if (CGM.getLangOpts().OpenMPIsDevice) {
6505    OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6506    OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6507    OutlinedFn->setDSOLocal(false);
6508  } else {
6509    std::string Name = getName({EntryFnName, "region_id"});
6510    OutlinedFnID = new llvm::GlobalVariable(
6511        CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6512        llvm::GlobalValue::WeakAnyLinkage,
6513        llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6514  }
6515
6516  // Register the information for the entry associated with this target region.
6517  OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6518      DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6519      OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6520}
6521
6522/// Checks if the expression is constant or does not have non-trivial function
6523/// calls.
6524static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6525  // We can skip constant expressions.
6526  // We can skip expressions with trivial calls or simple expressions.
6527  return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6528          !E->hasNonTrivialCall(Ctx)) &&
6529         !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6530}
6531
6532const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6533                                                    const Stmt *Body) {
6534  const Stmt *Child = Body->IgnoreContainers();
6535  while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6536    Child = nullptr;
6537    for (const Stmt *S : C->body()) {
6538      if (const auto *E = dyn_cast<Expr>(S)) {
6539        if (isTrivial(Ctx, E))
6540          continue;
6541      }
6542      // Some of the statements can be ignored.
6543      if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6544          isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6545        continue;
6546      // Analyze declarations.
6547      if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6548        if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6549              if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6550                  isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6551                  isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6552                  isa<UsingDirectiveDecl>(D) ||
6553                  isa<OMPDeclareReductionDecl>(D) ||
6554                  isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6555                return true;
6556              const auto *VD = dyn_cast<VarDecl>(D);
6557              if (!VD)
6558                return false;
6559              return VD->isConstexpr() ||
6560                     ((VD->getType().isTrivialType(Ctx) ||
6561                       VD->getType()->isReferenceType()) &&
6562                      (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6563            }))
6564          continue;
6565      }
6566      // Found multiple children - cannot get the one child only.
6567      if (Child)
6568        return nullptr;
6569      Child = S;
6570    }
6571    if (Child)
6572      Child = Child->IgnoreContainers();
6573  }
6574  return Child;
6575}
6576
6577/// Emit the number of teams for a target directive.  Inspect the num_teams
6578/// clause associated with a teams construct combined or closely nested
6579/// with the target directive.
6580///
6581/// Emit a team of size one for directives such as 'target parallel' that
6582/// have no associated teams construct.
6583///
6584/// Otherwise, return nullptr.
6585static llvm::Value *
6586emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6587                               const OMPExecutableDirective &D) {
6588  assert(!CGF.getLangOpts().OpenMPIsDevice &&
6589         "Clauses associated with the teams directive expected to be emitted "
6590         "only for the host!");
6591  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6592  assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6593         "Expected target-based executable directive.");
6594  CGBuilderTy &Bld = CGF.Builder;
6595  switch (DirectiveKind) {
6596  case OMPD_target: {
6597    const auto *CS = D.getInnermostCapturedStmt();
6598    const auto *Body =
6599        CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6600    const Stmt *ChildStmt =
6601        CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6602    if (const auto *NestedDir =
6603            dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6604      if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6605        if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6606          CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6607          CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6608          const Expr *NumTeams =
6609              NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6610          llvm::Value *NumTeamsVal =
6611              CGF.EmitScalarExpr(NumTeams,
6612                                 /*IgnoreResultAssign*/ true);
6613          return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6614                                   /*isSigned=*/true);
6615        }
6616        return Bld.getInt32(0);
6617      }
6618      if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6619          isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6620        return Bld.getInt32(1);
6621      return Bld.getInt32(0);
6622    }
6623    return nullptr;
6624  }
6625  case OMPD_target_teams:
6626  case OMPD_target_teams_distribute:
6627  case OMPD_target_teams_distribute_simd:
6628  case OMPD_target_teams_distribute_parallel_for:
6629  case OMPD_target_teams_distribute_parallel_for_simd: {
6630    if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6631      CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6632      const Expr *NumTeams =
6633          D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6634      llvm::Value *NumTeamsVal =
6635          CGF.EmitScalarExpr(NumTeams,
6636                             /*IgnoreResultAssign*/ true);
6637      return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6638                               /*isSigned=*/true);
6639    }
6640    return Bld.getInt32(0);
6641  }
6642  case OMPD_target_parallel:
6643  case OMPD_target_parallel_for:
6644  case OMPD_target_parallel_for_simd:
6645  case OMPD_target_simd:
6646    return Bld.getInt32(1);
6647  case OMPD_parallel:
6648  case OMPD_for:
6649  case OMPD_parallel_for:
6650  case OMPD_parallel_master:
6651  case OMPD_parallel_sections:
6652  case OMPD_for_simd:
6653  case OMPD_parallel_for_simd:
6654  case OMPD_cancel:
6655  case OMPD_cancellation_point:
6656  case OMPD_ordered:
6657  case OMPD_threadprivate:
6658  case OMPD_allocate:
6659  case OMPD_task:
6660  case OMPD_simd:
6661  case OMPD_sections:
6662  case OMPD_section:
6663  case OMPD_single:
6664  case OMPD_master:
6665  case OMPD_critical:
6666  case OMPD_taskyield:
6667  case OMPD_barrier:
6668  case OMPD_taskwait:
6669  case OMPD_taskgroup:
6670  case OMPD_atomic:
6671  case OMPD_flush:
6672  case OMPD_teams:
6673  case OMPD_target_data:
6674  case OMPD_target_exit_data:
6675  case OMPD_target_enter_data:
6676  case OMPD_distribute:
6677  case OMPD_distribute_simd:
6678  case OMPD_distribute_parallel_for:
6679  case OMPD_distribute_parallel_for_simd:
6680  case OMPD_teams_distribute:
6681  case OMPD_teams_distribute_simd:
6682  case OMPD_teams_distribute_parallel_for:
6683  case OMPD_teams_distribute_parallel_for_simd:
6684  case OMPD_target_update:
6685  case OMPD_declare_simd:
6686  case OMPD_declare_variant:
6687  case OMPD_declare_target:
6688  case OMPD_end_declare_target:
6689  case OMPD_declare_reduction:
6690  case OMPD_declare_mapper:
6691  case OMPD_taskloop:
6692  case OMPD_taskloop_simd:
6693  case OMPD_master_taskloop:
6694  case OMPD_master_taskloop_simd:
6695  case OMPD_parallel_master_taskloop:
6696  case OMPD_parallel_master_taskloop_simd:
6697  case OMPD_requires:
6698  case OMPD_unknown:
6699    break;
6700  }
6701  llvm_unreachable("Unexpected directive kind.");
6702}
6703
6704static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6705                                  llvm::Value *DefaultThreadLimitVal) {
6706  const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6707      CGF.getContext(), CS->getCapturedStmt());
6708  if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6709    if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6710      llvm::Value *NumThreads = nullptr;
6711      llvm::Value *CondVal = nullptr;
6712      // Handle if clause. If if clause present, the number of threads is
6713      // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6714      if (Dir->hasClausesOfKind<OMPIfClause>()) {
6715        CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6716        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6717        const OMPIfClause *IfClause = nullptr;
6718        for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6719          if (C->getNameModifier() == OMPD_unknown ||
6720              C->getNameModifier() == OMPD_parallel) {
6721            IfClause = C;
6722            break;
6723          }
6724        }
6725        if (IfClause) {
6726          const Expr *Cond = IfClause->getCondition();
6727          bool Result;
6728          if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6729            if (!Result)
6730              return CGF.Builder.getInt32(1);
6731          } else {
6732            CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6733            if (const auto *PreInit =
6734                    cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6735              for (const auto *I : PreInit->decls()) {
6736                if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6737                  CGF.EmitVarDecl(cast<VarDecl>(*I));
6738                } else {
6739                  CodeGenFunction::AutoVarEmission Emission =
6740                      CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6741                  CGF.EmitAutoVarCleanups(Emission);
6742                }
6743              }
6744            }
6745            CondVal = CGF.EvaluateExprAsBool(Cond);
6746          }
6747        }
6748      }
6749      // Check the value of num_threads clause iff if clause was not specified
6750      // or is not evaluated to false.
6751      if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6752        CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6753        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6754        const auto *NumThreadsClause =
6755            Dir->getSingleClause<OMPNumThreadsClause>();
6756        CodeGenFunction::LexicalScope Scope(
6757            CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6758        if (const auto *PreInit =
6759                cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6760          for (const auto *I : PreInit->decls()) {
6761            if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6762              CGF.EmitVarDecl(cast<VarDecl>(*I));
6763            } else {
6764              CodeGenFunction::AutoVarEmission Emission =
6765                  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6766              CGF.EmitAutoVarCleanups(Emission);
6767            }
6768          }
6769        }
6770        NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6771        NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6772                                               /*isSigned=*/false);
6773        if (DefaultThreadLimitVal)
6774          NumThreads = CGF.Builder.CreateSelect(
6775              CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6776              DefaultThreadLimitVal, NumThreads);
6777      } else {
6778        NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6779                                           : CGF.Builder.getInt32(0);
6780      }
6781      // Process condition of the if clause.
6782      if (CondVal) {
6783        NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6784                                              CGF.Builder.getInt32(1));
6785      }
6786      return NumThreads;
6787    }
6788    if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6789      return CGF.Builder.getInt32(1);
6790    return DefaultThreadLimitVal;
6791  }
6792  return DefaultThreadLimitVal ? DefaultThreadLimitVal
6793                               : CGF.Builder.getInt32(0);
6794}
6795
6796/// Emit the number of threads for a target directive.  Inspect the
6797/// thread_limit clause associated with a teams construct combined or closely
6798/// nested with the target directive.
6799///
6800/// Emit the num_threads clause for directives such as 'target parallel' that
6801/// have no associated teams construct.
6802///
6803/// Otherwise, return nullptr.
6804static llvm::Value *
6805emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6806                                 const OMPExecutableDirective &D) {
6807  assert(!CGF.getLangOpts().OpenMPIsDevice &&
6808         "Clauses associated with the teams directive expected to be emitted "
6809         "only for the host!");
6810  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6811  assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6812         "Expected target-based executable directive.");
6813  CGBuilderTy &Bld = CGF.Builder;
6814  llvm::Value *ThreadLimitVal = nullptr;
6815  llvm::Value *NumThreadsVal = nullptr;
6816  switch (DirectiveKind) {
6817  case OMPD_target: {
6818    const CapturedStmt *CS = D.getInnermostCapturedStmt();
6819    if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6820      return NumThreads;
6821    const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6822        CGF.getContext(), CS->getCapturedStmt());
6823    if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6824      if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6825        CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6826        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6827        const auto *ThreadLimitClause =
6828            Dir->getSingleClause<OMPThreadLimitClause>();
6829        CodeGenFunction::LexicalScope Scope(
6830            CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6831        if (const auto *PreInit =
6832                cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6833          for (const auto *I : PreInit->decls()) {
6834            if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6835              CGF.EmitVarDecl(cast<VarDecl>(*I));
6836            } else {
6837              CodeGenFunction::AutoVarEmission Emission =
6838                  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6839              CGF.EmitAutoVarCleanups(Emission);
6840            }
6841          }
6842        }
6843        llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6844            ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6845        ThreadLimitVal =
6846            Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6847      }
6848      if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6849          !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6850        CS = Dir->getInnermostCapturedStmt();
6851        const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6852            CGF.getContext(), CS->getCapturedStmt());
6853        Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6854      }
6855      if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6856          !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6857        CS = Dir->getInnermostCapturedStmt();
6858        if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6859          return NumThreads;
6860      }
6861      if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6862        return Bld.getInt32(1);
6863    }
6864    return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6865  }
6866  case OMPD_target_teams: {
6867    if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6868      CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6869      const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6870      llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6871          ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6872      ThreadLimitVal =
6873          Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6874    }
6875    const CapturedStmt *CS = D.getInnermostCapturedStmt();
6876    if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6877      return NumThreads;
6878    const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6879        CGF.getContext(), CS->getCapturedStmt());
6880    if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6881      if (Dir->getDirectiveKind() == OMPD_distribute) {
6882        CS = Dir->getInnermostCapturedStmt();
6883        if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6884          return NumThreads;
6885      }
6886    }
6887    return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6888  }
6889  case OMPD_target_teams_distribute:
6890    if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6891      CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6892      const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6893      llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6894          ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6895      ThreadLimitVal =
6896          Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6897    }
6898    return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6899  case OMPD_target_parallel:
6900  case OMPD_target_parallel_for:
6901  case OMPD_target_parallel_for_simd:
6902  case OMPD_target_teams_distribute_parallel_for:
6903  case OMPD_target_teams_distribute_parallel_for_simd: {
6904    llvm::Value *CondVal = nullptr;
6905    // Handle if clause. If if clause present, the number of threads is
6906    // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6907    if (D.hasClausesOfKind<OMPIfClause>()) {
6908      const OMPIfClause *IfClause = nullptr;
6909      for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6910        if (C->getNameModifier() == OMPD_unknown ||
6911            C->getNameModifier() == OMPD_parallel) {
6912          IfClause = C;
6913          break;
6914        }
6915      }
6916      if (IfClause) {
6917        const Expr *Cond = IfClause->getCondition();
6918        bool Result;
6919        if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6920          if (!Result)
6921            return Bld.getInt32(1);
6922        } else {
6923          CodeGenFunction::RunCleanupsScope Scope(CGF);
6924          CondVal = CGF.EvaluateExprAsBool(Cond);
6925        }
6926      }
6927    }
6928    if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6929      CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6930      const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6931      llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6932          ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6933      ThreadLimitVal =
6934          Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6935    }
6936    if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6937      CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6938      const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6939      llvm::Value *NumThreads = CGF.EmitScalarExpr(
6940          NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6941      NumThreadsVal =
6942          Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6943      ThreadLimitVal = ThreadLimitVal
6944                           ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6945                                                                ThreadLimitVal),
6946                                              NumThreadsVal, ThreadLimitVal)
6947                           : NumThreadsVal;
6948    }
6949    if (!ThreadLimitVal)
6950      ThreadLimitVal = Bld.getInt32(0);
6951    if (CondVal)
6952      return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6953    return ThreadLimitVal;
6954  }
6955  case OMPD_target_teams_distribute_simd:
6956  case OMPD_target_simd:
6957    return Bld.getInt32(1);
6958  case OMPD_parallel:
6959  case OMPD_for:
6960  case OMPD_parallel_for:
6961  case OMPD_parallel_master:
6962  case OMPD_parallel_sections:
6963  case OMPD_for_simd:
6964  case OMPD_parallel_for_simd:
6965  case OMPD_cancel:
6966  case OMPD_cancellation_point:
6967  case OMPD_ordered:
6968  case OMPD_threadprivate:
6969  case OMPD_allocate:
6970  case OMPD_task:
6971  case OMPD_simd:
6972  case OMPD_sections:
6973  case OMPD_section:
6974  case OMPD_single:
6975  case OMPD_master:
6976  case OMPD_critical:
6977  case OMPD_taskyield:
6978  case OMPD_barrier:
6979  case OMPD_taskwait:
6980  case OMPD_taskgroup:
6981  case OMPD_atomic:
6982  case OMPD_flush:
6983  case OMPD_teams:
6984  case OMPD_target_data:
6985  case OMPD_target_exit_data:
6986  case OMPD_target_enter_data:
6987  case OMPD_distribute:
6988  case OMPD_distribute_simd:
6989  case OMPD_distribute_parallel_for:
6990  case OMPD_distribute_parallel_for_simd:
6991  case OMPD_teams_distribute:
6992  case OMPD_teams_distribute_simd:
6993  case OMPD_teams_distribute_parallel_for:
6994  case OMPD_teams_distribute_parallel_for_simd:
6995  case OMPD_target_update:
6996  case OMPD_declare_simd:
6997  case OMPD_declare_variant:
6998  case OMPD_declare_target:
6999  case OMPD_end_declare_target:
7000  case OMPD_declare_reduction:
7001  case OMPD_declare_mapper:
7002  case OMPD_taskloop:
7003  case OMPD_taskloop_simd:
7004  case OMPD_master_taskloop:
7005  case OMPD_master_taskloop_simd:
7006  case OMPD_parallel_master_taskloop:
7007  case OMPD_parallel_master_taskloop_simd:
7008  case OMPD_requires:
7009  case OMPD_unknown:
7010    break;
7011  }
7012  llvm_unreachable("Unsupported directive kind.");
7013}
7014
7015namespace {
7016LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7017
7018// Utility to handle information from clauses associated with a given
7019// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7020// It provides a convenient interface to obtain the information and generate
7021// code for that information.
7022class MappableExprsHandler {
7023public:
7024  /// Values for bit flags used to specify the mapping type for
7025  /// offloading.
7026  enum OpenMPOffloadMappingFlags : uint64_t {
7027    /// No flags
7028    OMP_MAP_NONE = 0x0,
7029    /// Allocate memory on the device and move data from host to device.
7030    OMP_MAP_TO = 0x01,
7031    /// Allocate memory on the device and move data from device to host.
7032    OMP_MAP_FROM = 0x02,
7033    /// Always perform the requested mapping action on the element, even
7034    /// if it was already mapped before.
7035    OMP_MAP_ALWAYS = 0x04,
7036    /// Delete the element from the device environment, ignoring the
7037    /// current reference count associated with the element.
7038    OMP_MAP_DELETE = 0x08,
7039    /// The element being mapped is a pointer-pointee pair; both the
7040    /// pointer and the pointee should be mapped.
7041    OMP_MAP_PTR_AND_OBJ = 0x10,
7042    /// This flags signals that the base address of an entry should be
7043    /// passed to the target kernel as an argument.
7044    OMP_MAP_TARGET_PARAM = 0x20,
7045    /// Signal that the runtime library has to return the device pointer
7046    /// in the current position for the data being mapped. Used when we have the
7047    /// use_device_ptr clause.
7048    OMP_MAP_RETURN_PARAM = 0x40,
7049    /// This flag signals that the reference being passed is a pointer to
7050    /// private data.
7051    OMP_MAP_PRIVATE = 0x80,
7052    /// Pass the element to the device by value.
7053    OMP_MAP_LITERAL = 0x100,
7054    /// Implicit map
7055    OMP_MAP_IMPLICIT = 0x200,
7056    /// Close is a hint to the runtime to allocate memory close to
7057    /// the target device.
7058    OMP_MAP_CLOSE = 0x400,
7059    /// The 16 MSBs of the flags indicate whether the entry is member of some
7060    /// struct/class.
7061    OMP_MAP_MEMBER_OF = 0xffff000000000000,
7062    LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7063  };
7064
7065  /// Get the offset of the OMP_MAP_MEMBER_OF field.
7066  static unsigned getFlagMemberOffset() {
7067    unsigned Offset = 0;
7068    for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7069         Remain = Remain >> 1)
7070      Offset++;
7071    return Offset;
7072  }
7073
7074  /// Class that associates information with a base pointer to be passed to the
7075  /// runtime library.
7076  class BasePointerInfo {
7077    /// The base pointer.
7078    llvm::Value *Ptr = nullptr;
7079    /// The base declaration that refers to this device pointer, or null if
7080    /// there is none.
7081    const ValueDecl *DevPtrDecl = nullptr;
7082
7083  public:
7084    BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7085        : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7086    llvm::Value *operator*() const { return Ptr; }
7087    const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7088    void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7089  };
7090
7091  using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7092  using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7093  using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7094
7095  /// Map between a struct and the its lowest & highest elements which have been
7096  /// mapped.
7097  /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7098  ///                    HE(FieldIndex, Pointer)}
7099  struct StructRangeInfoTy {
7100    std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7101        0, Address::invalid()};
7102    std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7103        0, Address::invalid()};
7104    Address Base = Address::invalid();
7105  };
7106
7107private:
7108  /// Kind that defines how a device pointer has to be returned.
7109  struct MapInfo {
7110    OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7111    OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7112    ArrayRef<OpenMPMapModifierKind> MapModifiers;
7113    bool ReturnDevicePointer = false;
7114    bool IsImplicit = false;
7115
7116    MapInfo() = default;
7117    MapInfo(
7118        OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7119        OpenMPMapClauseKind MapType,
7120        ArrayRef<OpenMPMapModifierKind> MapModifiers,
7121        bool ReturnDevicePointer, bool IsImplicit)
7122        : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7123          ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7124  };
7125
7126  /// If use_device_ptr is used on a pointer which is a struct member and there
7127  /// is no map information about it, then emission of that entry is deferred
7128  /// until the whole struct has been processed.
7129  struct DeferredDevicePtrEntryTy {
7130    const Expr *IE = nullptr;
7131    const ValueDecl *VD = nullptr;
7132
7133    DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7134        : IE(IE), VD(VD) {}
7135  };
7136
7137  /// The target directive from where the mappable clauses were extracted. It
7138  /// is either a executable directive or a user-defined mapper directive.
7139  llvm::PointerUnion<const OMPExecutableDirective *,
7140                     const OMPDeclareMapperDecl *>
7141      CurDir;
7142
7143  /// Function the directive is being generated for.
7144  CodeGenFunction &CGF;
7145
7146  /// Set of all first private variables in the current directive.
7147  /// bool data is set to true if the variable is implicitly marked as
7148  /// firstprivate, false otherwise.
7149  llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7150
7151  /// Map between device pointer declarations and their expression components.
7152  /// The key value for declarations in 'this' is null.
7153  llvm::DenseMap<
7154      const ValueDecl *,
7155      SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7156      DevPointersMap;
7157
7158  llvm::Value *getExprTypeSize(const Expr *E) const {
7159    QualType ExprTy = E->getType().getCanonicalType();
7160
7161    // Reference types are ignored for mapping purposes.
7162    if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7163      ExprTy = RefTy->getPointeeType().getCanonicalType();
7164
7165    // Given that an array section is considered a built-in type, we need to
7166    // do the calculation based on the length of the section instead of relying
7167    // on CGF.getTypeSize(E->getType()).
7168    if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7169      QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7170                            OAE->getBase()->IgnoreParenImpCasts())
7171                            .getCanonicalType();
7172
7173      // If there is no length associated with the expression and lower bound is
7174      // not specified too, that means we are using the whole length of the
7175      // base.
7176      if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7177          !OAE->getLowerBound())
7178        return CGF.getTypeSize(BaseTy);
7179
7180      llvm::Value *ElemSize;
7181      if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7182        ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7183      } else {
7184        const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7185        assert(ATy && "Expecting array type if not a pointer type.");
7186        ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7187      }
7188
7189      // If we don't have a length at this point, that is because we have an
7190      // array section with a single element.
7191      if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7192        return ElemSize;
7193
7194      if (const Expr *LenExpr = OAE->getLength()) {
7195        llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7196        LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7197                                             CGF.getContext().getSizeType(),
7198                                             LenExpr->getExprLoc());
7199        return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7200      }
7201      assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7202             OAE->getLowerBound() && "expected array_section[lb:].");
7203      // Size = sizetype - lb * elemtype;
7204      llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7205      llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7206      LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7207                                       CGF.getContext().getSizeType(),
7208                                       OAE->getLowerBound()->getExprLoc());
7209      LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7210      llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7211      llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7212      LengthVal = CGF.Builder.CreateSelect(
7213          Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7214      return LengthVal;
7215    }
7216    return CGF.getTypeSize(ExprTy);
7217  }
7218
7219  /// Return the corresponding bits for a given map clause modifier. Add
7220  /// a flag marking the map as a pointer if requested. Add a flag marking the
7221  /// map as the first one of a series of maps that relate to the same map
7222  /// expression.
7223  OpenMPOffloadMappingFlags getMapTypeBits(
7224      OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7225      bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7226    OpenMPOffloadMappingFlags Bits =
7227        IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7228    switch (MapType) {
7229    case OMPC_MAP_alloc:
7230    case OMPC_MAP_release:
7231      // alloc and release is the default behavior in the runtime library,  i.e.
7232      // if we don't pass any bits alloc/release that is what the runtime is
7233      // going to do. Therefore, we don't need to signal anything for these two
7234      // type modifiers.
7235      break;
7236    case OMPC_MAP_to:
7237      Bits |= OMP_MAP_TO;
7238      break;
7239    case OMPC_MAP_from:
7240      Bits |= OMP_MAP_FROM;
7241      break;
7242    case OMPC_MAP_tofrom:
7243      Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7244      break;
7245    case OMPC_MAP_delete:
7246      Bits |= OMP_MAP_DELETE;
7247      break;
7248    case OMPC_MAP_unknown:
7249      llvm_unreachable("Unexpected map type!");
7250    }
7251    if (AddPtrFlag)
7252      Bits |= OMP_MAP_PTR_AND_OBJ;
7253    if (AddIsTargetParamFlag)
7254      Bits |= OMP_MAP_TARGET_PARAM;
7255    if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7256        != MapModifiers.end())
7257      Bits |= OMP_MAP_ALWAYS;
7258    if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7259        != MapModifiers.end())
7260      Bits |= OMP_MAP_CLOSE;
7261    return Bits;
7262  }
7263
7264  /// Return true if the provided expression is a final array section. A
7265  /// final array section, is one whose length can't be proved to be one.
7266  bool isFinalArraySectionExpression(const Expr *E) const {
7267    const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7268
7269    // It is not an array section and therefore not a unity-size one.
7270    if (!OASE)
7271      return false;
7272
7273    // An array section with no colon always refer to a single element.
7274    if (OASE->getColonLoc().isInvalid())
7275      return false;
7276
7277    const Expr *Length = OASE->getLength();
7278
7279    // If we don't have a length we have to check if the array has size 1
7280    // for this dimension. Also, we should always expect a length if the
7281    // base type is pointer.
7282    if (!Length) {
7283      QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7284                             OASE->getBase()->IgnoreParenImpCasts())
7285                             .getCanonicalType();
7286      if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7287        return ATy->getSize().getSExtValue() != 1;
7288      // If we don't have a constant dimension length, we have to consider
7289      // the current section as having any size, so it is not necessarily
7290      // unitary. If it happen to be unity size, that's user fault.
7291      return true;
7292    }
7293
7294    // Check if the length evaluates to 1.
7295    Expr::EvalResult Result;
7296    if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7297      return true; // Can have more that size 1.
7298
7299    llvm::APSInt ConstLength = Result.Val.getInt();
7300    return ConstLength.getSExtValue() != 1;
7301  }
7302
7303  /// Generate the base pointers, section pointers, sizes and map type
7304  /// bits for the provided map type, map modifier, and expression components.
7305  /// \a IsFirstComponent should be set to true if the provided set of
7306  /// components is the first associated with a capture.
7307  void generateInfoForComponentList(
7308      OpenMPMapClauseKind MapType,
7309      ArrayRef<OpenMPMapModifierKind> MapModifiers,
7310      OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7311      MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7312      MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7313      StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7314      bool IsImplicit,
7315      ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7316          OverlappedElements = llvm::None) const {
7317    // The following summarizes what has to be generated for each map and the
7318    // types below. The generated information is expressed in this order:
7319    // base pointer, section pointer, size, flags
7320    // (to add to the ones that come from the map type and modifier).
7321    //
7322    // double d;
7323    // int i[100];
7324    // float *p;
7325    //
7326    // struct S1 {
7327    //   int i;
7328    //   float f[50];
7329    // }
7330    // struct S2 {
7331    //   int i;
7332    //   float f[50];
7333    //   S1 s;
7334    //   double *p;
7335    //   struct S2 *ps;
7336    // }
7337    // S2 s;
7338    // S2 *ps;
7339    //
7340    // map(d)
7341    // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7342    //
7343    // map(i)
7344    // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7345    //
7346    // map(i[1:23])
7347    // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7348    //
7349    // map(p)
7350    // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7351    //
7352    // map(p[1:24])
7353    // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7354    //
7355    // map(s)
7356    // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7357    //
7358    // map(s.i)
7359    // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7360    //
7361    // map(s.s.f)
7362    // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7363    //
7364    // map(s.p)
7365    // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7366    //
7367    // map(to: s.p[:22])
7368    // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7369    // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7370    // &(s.p), &(s.p[0]), 22*sizeof(double),
7371    //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7372    // (*) alloc space for struct members, only this is a target parameter
7373    // (**) map the pointer (nothing to be mapped in this example) (the compiler
7374    //      optimizes this entry out, same in the examples below)
7375    // (***) map the pointee (map: to)
7376    //
7377    // map(s.ps)
7378    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7379    //
7380    // map(from: s.ps->s.i)
7381    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7382    // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7383    // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7384    //
7385    // map(to: s.ps->ps)
7386    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7387    // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7388    // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7389    //
7390    // map(s.ps->ps->ps)
7391    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7392    // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7393    // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7394    // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7395    //
7396    // map(to: s.ps->ps->s.f[:22])
7397    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7398    // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7399    // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7400    // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7401    //
7402    // map(ps)
7403    // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7404    //
7405    // map(ps->i)
7406    // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7407    //
7408    // map(ps->s.f)
7409    // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7410    //
7411    // map(from: ps->p)
7412    // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7413    //
7414    // map(to: ps->p[:22])
7415    // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7416    // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7417    // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7418    //
7419    // map(ps->ps)
7420    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7421    //
7422    // map(from: ps->ps->s.i)
7423    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7424    // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7425    // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7426    //
7427    // map(from: ps->ps->ps)
7428    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7429    // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7430    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7431    //
7432    // map(ps->ps->ps->ps)
7433    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7434    // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7435    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7436    // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7437    //
7438    // map(to: ps->ps->ps->s.f[:22])
7439    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7440    // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7441    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7442    // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7443    //
7444    // map(to: s.f[:22]) map(from: s.p[:33])
7445    // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7446    //     sizeof(double*) (**), TARGET_PARAM
7447    // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7448    // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7449    // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7450    // (*) allocate contiguous space needed to fit all mapped members even if
7451    //     we allocate space for members not mapped (in this example,
7452    //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7453    //     them as well because they fall between &s.f[0] and &s.p)
7454    //
7455    // map(from: s.f[:22]) map(to: ps->p[:33])
7456    // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7457    // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7458    // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7459    // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7460    // (*) the struct this entry pertains to is the 2nd element in the list of
7461    //     arguments, hence MEMBER_OF(2)
7462    //
7463    // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7464    // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7465    // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7466    // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7467    // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7468    // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7469    // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7470    // (*) the struct this entry pertains to is the 4th element in the list
7471    //     of arguments, hence MEMBER_OF(4)
7472
7473    // Track if the map information being generated is the first for a capture.
7474    bool IsCaptureFirstInfo = IsFirstComponentList;
7475    // When the variable is on a declare target link or in a to clause with
7476    // unified memory, a reference is needed to hold the host/device address
7477    // of the variable.
7478    bool RequiresReference = false;
7479
7480    // Scan the components from the base to the complete expression.
7481    auto CI = Components.rbegin();
7482    auto CE = Components.rend();
7483    auto I = CI;
7484
7485    // Track if the map information being generated is the first for a list of
7486    // components.
7487    bool IsExpressionFirstInfo = true;
7488    Address BP = Address::invalid();
7489    const Expr *AssocExpr = I->getAssociatedExpression();
7490    const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7491    const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7492
7493    if (isa<MemberExpr>(AssocExpr)) {
7494      // The base is the 'this' pointer. The content of the pointer is going
7495      // to be the base of the field being mapped.
7496      BP = CGF.LoadCXXThisAddress();
7497    } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7498               (OASE &&
7499                isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7500      BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7501    } else {
7502      // The base is the reference to the variable.
7503      // BP = &Var.
7504      BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7505      if (const auto *VD =
7506              dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7507        if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7508                OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7509          if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7510              (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7511               CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7512            RequiresReference = true;
7513            BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7514          }
7515        }
7516      }
7517
7518      // If the variable is a pointer and is being dereferenced (i.e. is not
7519      // the last component), the base has to be the pointer itself, not its
7520      // reference. References are ignored for mapping purposes.
7521      QualType Ty =
7522          I->getAssociatedDeclaration()->getType().getNonReferenceType();
7523      if (Ty->isAnyPointerType() && std::next(I) != CE) {
7524        BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7525
7526        // We do not need to generate individual map information for the
7527        // pointer, it can be associated with the combined storage.
7528        ++I;
7529      }
7530    }
7531
7532    // Track whether a component of the list should be marked as MEMBER_OF some
7533    // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7534    // in a component list should be marked as MEMBER_OF, all subsequent entries
7535    // do not belong to the base struct. E.g.
7536    // struct S2 s;
7537    // s.ps->ps->ps->f[:]
7538    //   (1) (2) (3) (4)
7539    // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7540    // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7541    // is the pointee of ps(2) which is not member of struct s, so it should not
7542    // be marked as such (it is still PTR_AND_OBJ).
7543    // The variable is initialized to false so that PTR_AND_OBJ entries which
7544    // are not struct members are not considered (e.g. array of pointers to
7545    // data).
7546    bool ShouldBeMemberOf = false;
7547
7548    // Variable keeping track of whether or not we have encountered a component
7549    // in the component list which is a member expression. Useful when we have a
7550    // pointer or a final array section, in which case it is the previous
7551    // component in the list which tells us whether we have a member expression.
7552    // E.g. X.f[:]
7553    // While processing the final array section "[:]" it is "f" which tells us
7554    // whether we are dealing with a member of a declared struct.
7555    const MemberExpr *EncounteredME = nullptr;
7556
7557    for (; I != CE; ++I) {
7558      // If the current component is member of a struct (parent struct) mark it.
7559      if (!EncounteredME) {
7560        EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7561        // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7562        // as MEMBER_OF the parent struct.
7563        if (EncounteredME)
7564          ShouldBeMemberOf = true;
7565      }
7566
7567      auto Next = std::next(I);
7568
7569      // We need to generate the addresses and sizes if this is the last
7570      // component, if the component is a pointer or if it is an array section
7571      // whose length can't be proved to be one. If this is a pointer, it
7572      // becomes the base address for the following components.
7573
7574      // A final array section, is one whose length can't be proved to be one.
7575      bool IsFinalArraySection =
7576          isFinalArraySectionExpression(I->getAssociatedExpression());
7577
7578      // Get information on whether the element is a pointer. Have to do a
7579      // special treatment for array sections given that they are built-in
7580      // types.
7581      const auto *OASE =
7582          dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7583      bool IsPointer =
7584          (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7585                       .getCanonicalType()
7586                       ->isAnyPointerType()) ||
7587          I->getAssociatedExpression()->getType()->isAnyPointerType();
7588
7589      if (Next == CE || IsPointer || IsFinalArraySection) {
7590        // If this is not the last component, we expect the pointer to be
7591        // associated with an array expression or member expression.
7592        assert((Next == CE ||
7593                isa<MemberExpr>(Next->getAssociatedExpression()) ||
7594                isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7595                isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7596               "Unexpected expression");
7597
7598        Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7599                         .getAddress(CGF);
7600
7601        // If this component is a pointer inside the base struct then we don't
7602        // need to create any entry for it - it will be combined with the object
7603        // it is pointing to into a single PTR_AND_OBJ entry.
7604        bool IsMemberPointer =
7605            IsPointer && EncounteredME &&
7606            (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7607             EncounteredME);
7608        if (!OverlappedElements.empty()) {
7609          // Handle base element with the info for overlapped elements.
7610          assert(!PartialStruct.Base.isValid() && "The base element is set.");
7611          assert(Next == CE &&
7612                 "Expected last element for the overlapped elements.");
7613          assert(!IsPointer &&
7614                 "Unexpected base element with the pointer type.");
7615          // Mark the whole struct as the struct that requires allocation on the
7616          // device.
7617          PartialStruct.LowestElem = {0, LB};
7618          CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7619              I->getAssociatedExpression()->getType());
7620          Address HB = CGF.Builder.CreateConstGEP(
7621              CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7622                                                              CGF.VoidPtrTy),
7623              TypeSize.getQuantity() - 1);
7624          PartialStruct.HighestElem = {
7625              std::numeric_limits<decltype(
7626                  PartialStruct.HighestElem.first)>::max(),
7627              HB};
7628          PartialStruct.Base = BP;
7629          // Emit data for non-overlapped data.
7630          OpenMPOffloadMappingFlags Flags =
7631              OMP_MAP_MEMBER_OF |
7632              getMapTypeBits(MapType, MapModifiers, IsImplicit,
7633                             /*AddPtrFlag=*/false,
7634                             /*AddIsTargetParamFlag=*/false);
7635          LB = BP;
7636          llvm::Value *Size = nullptr;
7637          // Do bitcopy of all non-overlapped structure elements.
7638          for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7639                   Component : OverlappedElements) {
7640            Address ComponentLB = Address::invalid();
7641            for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7642                 Component) {
7643              if (MC.getAssociatedDeclaration()) {
7644                ComponentLB =
7645                    CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7646                        .getAddress(CGF);
7647                Size = CGF.Builder.CreatePtrDiff(
7648                    CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7649                    CGF.EmitCastToVoidPtr(LB.getPointer()));
7650                break;
7651              }
7652            }
7653            BasePointers.push_back(BP.getPointer());
7654            Pointers.push_back(LB.getPointer());
7655            Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7656                                                      /*isSigned=*/true));
7657            Types.push_back(Flags);
7658            LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7659          }
7660          BasePointers.push_back(BP.getPointer());
7661          Pointers.push_back(LB.getPointer());
7662          Size = CGF.Builder.CreatePtrDiff(
7663              CGF.EmitCastToVoidPtr(
7664                  CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7665              CGF.EmitCastToVoidPtr(LB.getPointer()));
7666          Sizes.push_back(
7667              CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7668          Types.push_back(Flags);
7669          break;
7670        }
7671        llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7672        if (!IsMemberPointer) {
7673          BasePointers.push_back(BP.getPointer());
7674          Pointers.push_back(LB.getPointer());
7675          Sizes.push_back(
7676              CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7677
7678          // We need to add a pointer flag for each map that comes from the
7679          // same expression except for the first one. We also need to signal
7680          // this map is the first one that relates with the current capture
7681          // (there is a set of entries for each capture).
7682          OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7683              MapType, MapModifiers, IsImplicit,
7684              !IsExpressionFirstInfo || RequiresReference,
7685              IsCaptureFirstInfo && !RequiresReference);
7686
7687          if (!IsExpressionFirstInfo) {
7688            // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7689            // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7690            if (IsPointer)
7691              Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7692                         OMP_MAP_DELETE | OMP_MAP_CLOSE);
7693
7694            if (ShouldBeMemberOf) {
7695              // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7696              // should be later updated with the correct value of MEMBER_OF.
7697              Flags |= OMP_MAP_MEMBER_OF;
7698              // From now on, all subsequent PTR_AND_OBJ entries should not be
7699              // marked as MEMBER_OF.
7700              ShouldBeMemberOf = false;
7701            }
7702          }
7703
7704          Types.push_back(Flags);
7705        }
7706
7707        // If we have encountered a member expression so far, keep track of the
7708        // mapped member. If the parent is "*this", then the value declaration
7709        // is nullptr.
7710        if (EncounteredME) {
7711          const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7712          unsigned FieldIndex = FD->getFieldIndex();
7713
7714          // Update info about the lowest and highest elements for this struct
7715          if (!PartialStruct.Base.isValid()) {
7716            PartialStruct.LowestElem = {FieldIndex, LB};
7717            PartialStruct.HighestElem = {FieldIndex, LB};
7718            PartialStruct.Base = BP;
7719          } else if (FieldIndex < PartialStruct.LowestElem.first) {
7720            PartialStruct.LowestElem = {FieldIndex, LB};
7721          } else if (FieldIndex > PartialStruct.HighestElem.first) {
7722            PartialStruct.HighestElem = {FieldIndex, LB};
7723          }
7724        }
7725
7726        // If we have a final array section, we are done with this expression.
7727        if (IsFinalArraySection)
7728          break;
7729
7730        // The pointer becomes the base for the next element.
7731        if (Next != CE)
7732          BP = LB;
7733
7734        IsExpressionFirstInfo = false;
7735        IsCaptureFirstInfo = false;
7736      }
7737    }
7738  }
7739
7740  /// Return the adjusted map modifiers if the declaration a capture refers to
7741  /// appears in a first-private clause. This is expected to be used only with
7742  /// directives that start with 'target'.
7743  MappableExprsHandler::OpenMPOffloadMappingFlags
7744  getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7745    assert(Cap.capturesVariable() && "Expected capture by reference only!");
7746
7747    // A first private variable captured by reference will use only the
7748    // 'private ptr' and 'map to' flag. Return the right flags if the captured
7749    // declaration is known as first-private in this handler.
7750    if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7751      if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7752          Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7753        return MappableExprsHandler::OMP_MAP_ALWAYS |
7754               MappableExprsHandler::OMP_MAP_TO;
7755      if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7756        return MappableExprsHandler::OMP_MAP_TO |
7757               MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7758      return MappableExprsHandler::OMP_MAP_PRIVATE |
7759             MappableExprsHandler::OMP_MAP_TO;
7760    }
7761    return MappableExprsHandler::OMP_MAP_TO |
7762           MappableExprsHandler::OMP_MAP_FROM;
7763  }
7764
7765  static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7766    // Rotate by getFlagMemberOffset() bits.
7767    return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7768                                                  << getFlagMemberOffset());
7769  }
7770
7771  static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7772                                     OpenMPOffloadMappingFlags MemberOfFlag) {
7773    // If the entry is PTR_AND_OBJ but has not been marked with the special
7774    // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7775    // marked as MEMBER_OF.
7776    if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7777        ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7778      return;
7779
7780    // Reset the placeholder value to prepare the flag for the assignment of the
7781    // proper MEMBER_OF value.
7782    Flags &= ~OMP_MAP_MEMBER_OF;
7783    Flags |= MemberOfFlag;
7784  }
7785
7786  void getPlainLayout(const CXXRecordDecl *RD,
7787                      llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7788                      bool AsBase) const {
7789    const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7790
7791    llvm::StructType *St =
7792        AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7793
7794    unsigned NumElements = St->getNumElements();
7795    llvm::SmallVector<
7796        llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7797        RecordLayout(NumElements);
7798
7799    // Fill bases.
7800    for (const auto &I : RD->bases()) {
7801      if (I.isVirtual())
7802        continue;
7803      const auto *Base = I.getType()->getAsCXXRecordDecl();
7804      // Ignore empty bases.
7805      if (Base->isEmpty() || CGF.getContext()
7806                                 .getASTRecordLayout(Base)
7807                                 .getNonVirtualSize()
7808                                 .isZero())
7809        continue;
7810
7811      unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7812      RecordLayout[FieldIndex] = Base;
7813    }
7814    // Fill in virtual bases.
7815    for (const auto &I : RD->vbases()) {
7816      const auto *Base = I.getType()->getAsCXXRecordDecl();
7817      // Ignore empty bases.
7818      if (Base->isEmpty())
7819        continue;
7820      unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7821      if (RecordLayout[FieldIndex])
7822        continue;
7823      RecordLayout[FieldIndex] = Base;
7824    }
7825    // Fill in all the fields.
7826    assert(!RD->isUnion() && "Unexpected union.");
7827    for (const auto *Field : RD->fields()) {
7828      // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7829      // will fill in later.)
7830      if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7831        unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7832        RecordLayout[FieldIndex] = Field;
7833      }
7834    }
7835    for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7836             &Data : RecordLayout) {
7837      if (Data.isNull())
7838        continue;
7839      if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7840        getPlainLayout(Base, Layout, /*AsBase=*/true);
7841      else
7842        Layout.push_back(Data.get<const FieldDecl *>());
7843    }
7844  }
7845
7846public:
7847  MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7848      : CurDir(&Dir), CGF(CGF) {
7849    // Extract firstprivate clause information.
7850    for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7851      for (const auto *D : C->varlists())
7852        FirstPrivateDecls.try_emplace(
7853            cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7854    // Extract device pointer clause information.
7855    for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7856      for (auto L : C->component_lists())
7857        DevPointersMap[L.first].push_back(L.second);
7858  }
7859
7860  /// Constructor for the declare mapper directive.
7861  MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7862      : CurDir(&Dir), CGF(CGF) {}
7863
7864  /// Generate code for the combined entry if we have a partially mapped struct
7865  /// and take care of the mapping flags of the arguments corresponding to
7866  /// individual struct members.
7867  void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7868                         MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7869                         MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7870                         const StructRangeInfoTy &PartialStruct) const {
7871    // Base is the base of the struct
7872    BasePointers.push_back(PartialStruct.Base.getPointer());
7873    // Pointer is the address of the lowest element
7874    llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7875    Pointers.push_back(LB);
7876    // Size is (addr of {highest+1} element) - (addr of lowest element)
7877    llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7878    llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7879    llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7880    llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7881    llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7882    llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7883                                                  /*isSigned=*/false);
7884    Sizes.push_back(Size);
7885    // Map type is always TARGET_PARAM
7886    Types.push_back(OMP_MAP_TARGET_PARAM);
7887    // Remove TARGET_PARAM flag from the first element
7888    (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7889
7890    // All other current entries will be MEMBER_OF the combined entry
7891    // (except for PTR_AND_OBJ entries which do not have a placeholder value
7892    // 0xFFFF in the MEMBER_OF field).
7893    OpenMPOffloadMappingFlags MemberOfFlag =
7894        getMemberOfFlag(BasePointers.size() - 1);
7895    for (auto &M : CurTypes)
7896      setCorrectMemberOfFlag(M, MemberOfFlag);
7897  }
7898
7899  /// Generate all the base pointers, section pointers, sizes and map
7900  /// types for the extracted mappable expressions. Also, for each item that
7901  /// relates with a device pointer, a pair of the relevant declaration and
7902  /// index where it occurs is appended to the device pointers info array.
7903  void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7904                       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7905                       MapFlagsArrayTy &Types) const {
7906    // We have to process the component lists that relate with the same
7907    // declaration in a single chunk so that we can generate the map flags
7908    // correctly. Therefore, we organize all lists in a map.
7909    llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7910
7911    // Helper function to fill the information map for the different supported
7912    // clauses.
7913    auto &&InfoGen = [&Info](
7914        const ValueDecl *D,
7915        OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7916        OpenMPMapClauseKind MapType,
7917        ArrayRef<OpenMPMapModifierKind> MapModifiers,
7918        bool ReturnDevicePointer, bool IsImplicit) {
7919      const ValueDecl *VD =
7920          D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7921      Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7922                            IsImplicit);
7923    };
7924
7925    assert(CurDir.is<const OMPExecutableDirective *>() &&
7926           "Expect a executable directive");
7927    const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7928    for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7929      for (const auto L : C->component_lists()) {
7930        InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7931            /*ReturnDevicePointer=*/false, C->isImplicit());
7932      }
7933    for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7934      for (const auto L : C->component_lists()) {
7935        InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7936            /*ReturnDevicePointer=*/false, C->isImplicit());
7937      }
7938    for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7939      for (const auto L : C->component_lists()) {
7940        InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7941            /*ReturnDevicePointer=*/false, C->isImplicit());
7942      }
7943
7944    // Look at the use_device_ptr clause information and mark the existing map
7945    // entries as such. If there is no map information for an entry in the
7946    // use_device_ptr list, we create one with map type 'alloc' and zero size
7947    // section. It is the user fault if that was not mapped before. If there is
7948    // no map information and the pointer is a struct member, then we defer the
7949    // emission of that entry until the whole struct has been processed.
7950    llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7951        DeferredInfo;
7952
7953    for (const auto *C :
7954         CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
7955      for (const auto L : C->component_lists()) {
7956        assert(!L.second.empty() && "Not expecting empty list of components!");
7957        const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7958        VD = cast<ValueDecl>(VD->getCanonicalDecl());
7959        const Expr *IE = L.second.back().getAssociatedExpression();
7960        // If the first component is a member expression, we have to look into
7961        // 'this', which maps to null in the map of map information. Otherwise
7962        // look directly for the information.
7963        auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7964
7965        // We potentially have map information for this declaration already.
7966        // Look for the first set of components that refer to it.
7967        if (It != Info.end()) {
7968          auto CI = std::find_if(
7969              It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7970                return MI.Components.back().getAssociatedDeclaration() == VD;
7971              });
7972          // If we found a map entry, signal that the pointer has to be returned
7973          // and move on to the next declaration.
7974          if (CI != It->second.end()) {
7975            CI->ReturnDevicePointer = true;
7976            continue;
7977          }
7978        }
7979
7980        // We didn't find any match in our map information - generate a zero
7981        // size array section - if the pointer is a struct member we defer this
7982        // action until the whole struct has been processed.
7983        if (isa<MemberExpr>(IE)) {
7984          // Insert the pointer into Info to be processed by
7985          // generateInfoForComponentList. Because it is a member pointer
7986          // without a pointee, no entry will be generated for it, therefore
7987          // we need to generate one after the whole struct has been processed.
7988          // Nonetheless, generateInfoForComponentList must be called to take
7989          // the pointer into account for the calculation of the range of the
7990          // partial struct.
7991          InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7992                  /*ReturnDevicePointer=*/false, C->isImplicit());
7993          DeferredInfo[nullptr].emplace_back(IE, VD);
7994        } else {
7995          llvm::Value *Ptr =
7996              CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7997          BasePointers.emplace_back(Ptr, VD);
7998          Pointers.push_back(Ptr);
7999          Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8000          Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8001        }
8002      }
8003    }
8004
8005    for (const auto &M : Info) {
8006      // We need to know when we generate information for the first component
8007      // associated with a capture, because the mapping flags depend on it.
8008      bool IsFirstComponentList = true;
8009
8010      // Temporary versions of arrays
8011      MapBaseValuesArrayTy CurBasePointers;
8012      MapValuesArrayTy CurPointers;
8013      MapValuesArrayTy CurSizes;
8014      MapFlagsArrayTy CurTypes;
8015      StructRangeInfoTy PartialStruct;
8016
8017      for (const MapInfo &L : M.second) {
8018        assert(!L.Components.empty() &&
8019               "Not expecting declaration with no component lists.");
8020
8021        // Remember the current base pointer index.
8022        unsigned CurrentBasePointersIdx = CurBasePointers.size();
8023        generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8024                                     CurBasePointers, CurPointers, CurSizes,
8025                                     CurTypes, PartialStruct,
8026                                     IsFirstComponentList, L.IsImplicit);
8027
8028        // If this entry relates with a device pointer, set the relevant
8029        // declaration and add the 'return pointer' flag.
8030        if (L.ReturnDevicePointer) {
8031          assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8032                 "Unexpected number of mapped base pointers.");
8033
8034          const ValueDecl *RelevantVD =
8035              L.Components.back().getAssociatedDeclaration();
8036          assert(RelevantVD &&
8037                 "No relevant declaration related with device pointer??");
8038
8039          CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8040          CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8041        }
8042        IsFirstComponentList = false;
8043      }
8044
8045      // Append any pending zero-length pointers which are struct members and
8046      // used with use_device_ptr.
8047      auto CI = DeferredInfo.find(M.first);
8048      if (CI != DeferredInfo.end()) {
8049        for (const DeferredDevicePtrEntryTy &L : CI->second) {
8050          llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8051          llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8052              this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8053          CurBasePointers.emplace_back(BasePtr, L.VD);
8054          CurPointers.push_back(Ptr);
8055          CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8056          // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8057          // value MEMBER_OF=FFFF so that the entry is later updated with the
8058          // correct value of MEMBER_OF.
8059          CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8060                             OMP_MAP_MEMBER_OF);
8061        }
8062      }
8063
8064      // If there is an entry in PartialStruct it means we have a struct with
8065      // individual members mapped. Emit an extra combined entry.
8066      if (PartialStruct.Base.isValid())
8067        emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8068                          PartialStruct);
8069
8070      // We need to append the results of this capture to what we already have.
8071      BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8072      Pointers.append(CurPointers.begin(), CurPointers.end());
8073      Sizes.append(CurSizes.begin(), CurSizes.end());
8074      Types.append(CurTypes.begin(), CurTypes.end());
8075    }
8076  }
8077
8078  /// Generate all the base pointers, section pointers, sizes and map types for
8079  /// the extracted map clauses of user-defined mapper.
8080  void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8081                                MapValuesArrayTy &Pointers,
8082                                MapValuesArrayTy &Sizes,
8083                                MapFlagsArrayTy &Types) const {
8084    assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8085           "Expect a declare mapper directive");
8086    const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8087    // We have to process the component lists that relate with the same
8088    // declaration in a single chunk so that we can generate the map flags
8089    // correctly. Therefore, we organize all lists in a map.
8090    llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8091
8092    // Helper function to fill the information map for the different supported
8093    // clauses.
8094    auto &&InfoGen = [&Info](
8095        const ValueDecl *D,
8096        OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8097        OpenMPMapClauseKind MapType,
8098        ArrayRef<OpenMPMapModifierKind> MapModifiers,
8099        bool ReturnDevicePointer, bool IsImplicit) {
8100      const ValueDecl *VD =
8101          D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8102      Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8103                            IsImplicit);
8104    };
8105
8106    for (const auto *C : CurMapperDir->clauselists()) {
8107      const auto *MC = cast<OMPMapClause>(C);
8108      for (const auto L : MC->component_lists()) {
8109        InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8110                /*ReturnDevicePointer=*/false, MC->isImplicit());
8111      }
8112    }
8113
8114    for (const auto &M : Info) {
8115      // We need to know when we generate information for the first component
8116      // associated with a capture, because the mapping flags depend on it.
8117      bool IsFirstComponentList = true;
8118
8119      // Temporary versions of arrays
8120      MapBaseValuesArrayTy CurBasePointers;
8121      MapValuesArrayTy CurPointers;
8122      MapValuesArrayTy CurSizes;
8123      MapFlagsArrayTy CurTypes;
8124      StructRangeInfoTy PartialStruct;
8125
8126      for (const MapInfo &L : M.second) {
8127        assert(!L.Components.empty() &&
8128               "Not expecting declaration with no component lists.");
8129        generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8130                                     CurBasePointers, CurPointers, CurSizes,
8131                                     CurTypes, PartialStruct,
8132                                     IsFirstComponentList, L.IsImplicit);
8133        IsFirstComponentList = false;
8134      }
8135
8136      // If there is an entry in PartialStruct it means we have a struct with
8137      // individual members mapped. Emit an extra combined entry.
8138      if (PartialStruct.Base.isValid())
8139        emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8140                          PartialStruct);
8141
8142      // We need to append the results of this capture to what we already have.
8143      BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8144      Pointers.append(CurPointers.begin(), CurPointers.end());
8145      Sizes.append(CurSizes.begin(), CurSizes.end());
8146      Types.append(CurTypes.begin(), CurTypes.end());
8147    }
8148  }
8149
8150  /// Emit capture info for lambdas for variables captured by reference.
8151  void generateInfoForLambdaCaptures(
8152      const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8153      MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8154      MapFlagsArrayTy &Types,
8155      llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8156    const auto *RD = VD->getType()
8157                         .getCanonicalType()
8158                         .getNonReferenceType()
8159                         ->getAsCXXRecordDecl();
8160    if (!RD || !RD->isLambda())
8161      return;
8162    Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8163    LValue VDLVal = CGF.MakeAddrLValue(
8164        VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8165    llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8166    FieldDecl *ThisCapture = nullptr;
8167    RD->getCaptureFields(Captures, ThisCapture);
8168    if (ThisCapture) {
8169      LValue ThisLVal =
8170          CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8171      LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8172      LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8173                                 VDLVal.getPointer(CGF));
8174      BasePointers.push_back(ThisLVal.getPointer(CGF));
8175      Pointers.push_back(ThisLValVal.getPointer(CGF));
8176      Sizes.push_back(
8177          CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8178                                    CGF.Int64Ty, /*isSigned=*/true));
8179      Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8180                      OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8181    }
8182    for (const LambdaCapture &LC : RD->captures()) {
8183      if (!LC.capturesVariable())
8184        continue;
8185      const VarDecl *VD = LC.getCapturedVar();
8186      if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8187        continue;
8188      auto It = Captures.find(VD);
8189      assert(It != Captures.end() && "Found lambda capture without field.");
8190      LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8191      if (LC.getCaptureKind() == LCK_ByRef) {
8192        LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8193        LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8194                                   VDLVal.getPointer(CGF));
8195        BasePointers.push_back(VarLVal.getPointer(CGF));
8196        Pointers.push_back(VarLValVal.getPointer(CGF));
8197        Sizes.push_back(CGF.Builder.CreateIntCast(
8198            CGF.getTypeSize(
8199                VD->getType().getCanonicalType().getNonReferenceType()),
8200            CGF.Int64Ty, /*isSigned=*/true));
8201      } else {
8202        RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8203        LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8204                                   VDLVal.getPointer(CGF));
8205        BasePointers.push_back(VarLVal.getPointer(CGF));
8206        Pointers.push_back(VarRVal.getScalarVal());
8207        Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8208      }
8209      Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8210                      OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8211    }
8212  }
8213
8214  /// Set correct indices for lambdas captures.
8215  void adjustMemberOfForLambdaCaptures(
8216      const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8217      MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8218      MapFlagsArrayTy &Types) const {
8219    for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8220      // Set correct member_of idx for all implicit lambda captures.
8221      if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8222                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8223        continue;
8224      llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8225      assert(BasePtr && "Unable to find base lambda address.");
8226      int TgtIdx = -1;
8227      for (unsigned J = I; J > 0; --J) {
8228        unsigned Idx = J - 1;
8229        if (Pointers[Idx] != BasePtr)
8230          continue;
8231        TgtIdx = Idx;
8232        break;
8233      }
8234      assert(TgtIdx != -1 && "Unable to find parent lambda.");
8235      // All other current entries will be MEMBER_OF the combined entry
8236      // (except for PTR_AND_OBJ entries which do not have a placeholder value
8237      // 0xFFFF in the MEMBER_OF field).
8238      OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8239      setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8240    }
8241  }
8242
8243  /// Generate the base pointers, section pointers, sizes and map types
8244  /// associated to a given capture.
8245  void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8246                              llvm::Value *Arg,
8247                              MapBaseValuesArrayTy &BasePointers,
8248                              MapValuesArrayTy &Pointers,
8249                              MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8250                              StructRangeInfoTy &PartialStruct) const {
8251    assert(!Cap->capturesVariableArrayType() &&
8252           "Not expecting to generate map info for a variable array type!");
8253
8254    // We need to know when we generating information for the first component
8255    const ValueDecl *VD = Cap->capturesThis()
8256                              ? nullptr
8257                              : Cap->getCapturedVar()->getCanonicalDecl();
8258
8259    // If this declaration appears in a is_device_ptr clause we just have to
8260    // pass the pointer by value. If it is a reference to a declaration, we just
8261    // pass its value.
8262    if (DevPointersMap.count(VD)) {
8263      BasePointers.emplace_back(Arg, VD);
8264      Pointers.push_back(Arg);
8265      Sizes.push_back(
8266          CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8267                                    CGF.Int64Ty, /*isSigned=*/true));
8268      Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8269      return;
8270    }
8271
8272    using MapData =
8273        std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8274                   OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8275    SmallVector<MapData, 4> DeclComponentLists;
8276    assert(CurDir.is<const OMPExecutableDirective *>() &&
8277           "Expect a executable directive");
8278    const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8279    for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8280      for (const auto L : C->decl_component_lists(VD)) {
8281        assert(L.first == VD &&
8282               "We got information for the wrong declaration??");
8283        assert(!L.second.empty() &&
8284               "Not expecting declaration with no component lists.");
8285        DeclComponentLists.emplace_back(L.second, C->getMapType(),
8286                                        C->getMapTypeModifiers(),
8287                                        C->isImplicit());
8288      }
8289    }
8290
8291    // Find overlapping elements (including the offset from the base element).
8292    llvm::SmallDenseMap<
8293        const MapData *,
8294        llvm::SmallVector<
8295            OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8296        4>
8297        OverlappedData;
8298    size_t Count = 0;
8299    for (const MapData &L : DeclComponentLists) {
8300      OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8301      OpenMPMapClauseKind MapType;
8302      ArrayRef<OpenMPMapModifierKind> MapModifiers;
8303      bool IsImplicit;
8304      std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8305      ++Count;
8306      for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8307        OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8308        std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8309        auto CI = Components.rbegin();
8310        auto CE = Components.rend();
8311        auto SI = Components1.rbegin();
8312        auto SE = Components1.rend();
8313        for (; CI != CE && SI != SE; ++CI, ++SI) {
8314          if (CI->getAssociatedExpression()->getStmtClass() !=
8315              SI->getAssociatedExpression()->getStmtClass())
8316            break;
8317          // Are we dealing with different variables/fields?
8318          if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8319            break;
8320        }
8321        // Found overlapping if, at least for one component, reached the head of
8322        // the components list.
8323        if (CI == CE || SI == SE) {
8324          assert((CI != CE || SI != SE) &&
8325                 "Unexpected full match of the mapping components.");
8326          const MapData &BaseData = CI == CE ? L : L1;
8327          OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8328              SI == SE ? Components : Components1;
8329          auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8330          OverlappedElements.getSecond().push_back(SubData);
8331        }
8332      }
8333    }
8334    // Sort the overlapped elements for each item.
8335    llvm::SmallVector<const FieldDecl *, 4> Layout;
8336    if (!OverlappedData.empty()) {
8337      if (const auto *CRD =
8338              VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8339        getPlainLayout(CRD, Layout, /*AsBase=*/false);
8340      else {
8341        const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8342        Layout.append(RD->field_begin(), RD->field_end());
8343      }
8344    }
8345    for (auto &Pair : OverlappedData) {
8346      llvm::sort(
8347          Pair.getSecond(),
8348          [&Layout](
8349              OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8350              OMPClauseMappableExprCommon::MappableExprComponentListRef
8351                  Second) {
8352            auto CI = First.rbegin();
8353            auto CE = First.rend();
8354            auto SI = Second.rbegin();
8355            auto SE = Second.rend();
8356            for (; CI != CE && SI != SE; ++CI, ++SI) {
8357              if (CI->getAssociatedExpression()->getStmtClass() !=
8358                  SI->getAssociatedExpression()->getStmtClass())
8359                break;
8360              // Are we dealing with different variables/fields?
8361              if (CI->getAssociatedDeclaration() !=
8362                  SI->getAssociatedDeclaration())
8363                break;
8364            }
8365
8366            // Lists contain the same elements.
8367            if (CI == CE && SI == SE)
8368              return false;
8369
8370            // List with less elements is less than list with more elements.
8371            if (CI == CE || SI == SE)
8372              return CI == CE;
8373
8374            const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8375            const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8376            if (FD1->getParent() == FD2->getParent())
8377              return FD1->getFieldIndex() < FD2->getFieldIndex();
8378            const auto It =
8379                llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8380                  return FD == FD1 || FD == FD2;
8381                });
8382            return *It == FD1;
8383          });
8384    }
8385
8386    // Associated with a capture, because the mapping flags depend on it.
8387    // Go through all of the elements with the overlapped elements.
8388    for (const auto &Pair : OverlappedData) {
8389      const MapData &L = *Pair.getFirst();
8390      OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8391      OpenMPMapClauseKind MapType;
8392      ArrayRef<OpenMPMapModifierKind> MapModifiers;
8393      bool IsImplicit;
8394      std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8395      ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8396          OverlappedComponents = Pair.getSecond();
8397      bool IsFirstComponentList = true;
8398      generateInfoForComponentList(MapType, MapModifiers, Components,
8399                                   BasePointers, Pointers, Sizes, Types,
8400                                   PartialStruct, IsFirstComponentList,
8401                                   IsImplicit, OverlappedComponents);
8402    }
8403    // Go through other elements without overlapped elements.
8404    bool IsFirstComponentList = OverlappedData.empty();
8405    for (const MapData &L : DeclComponentLists) {
8406      OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8407      OpenMPMapClauseKind MapType;
8408      ArrayRef<OpenMPMapModifierKind> MapModifiers;
8409      bool IsImplicit;
8410      std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8411      auto It = OverlappedData.find(&L);
8412      if (It == OverlappedData.end())
8413        generateInfoForComponentList(MapType, MapModifiers, Components,
8414                                     BasePointers, Pointers, Sizes, Types,
8415                                     PartialStruct, IsFirstComponentList,
8416                                     IsImplicit);
8417      IsFirstComponentList = false;
8418    }
8419  }
8420
8421  /// Generate the base pointers, section pointers, sizes and map types
8422  /// associated with the declare target link variables.
8423  void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8424                                        MapValuesArrayTy &Pointers,
8425                                        MapValuesArrayTy &Sizes,
8426                                        MapFlagsArrayTy &Types) const {
8427    assert(CurDir.is<const OMPExecutableDirective *>() &&
8428           "Expect a executable directive");
8429    const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8430    // Map other list items in the map clause which are not captured variables
8431    // but "declare target link" global variables.
8432    for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8433      for (const auto L : C->component_lists()) {
8434        if (!L.first)
8435          continue;
8436        const auto *VD = dyn_cast<VarDecl>(L.first);
8437        if (!VD)
8438          continue;
8439        llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8440            OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8441        if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8442            !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8443          continue;
8444        StructRangeInfoTy PartialStruct;
8445        generateInfoForComponentList(
8446            C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8447            Pointers, Sizes, Types, PartialStruct,
8448            /*IsFirstComponentList=*/true, C->isImplicit());
8449        assert(!PartialStruct.Base.isValid() &&
8450               "No partial structs for declare target link expected.");
8451      }
8452    }
8453  }
8454
8455  /// Generate the default map information for a given capture \a CI,
8456  /// record field declaration \a RI and captured value \a CV.
8457  void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8458                              const FieldDecl &RI, llvm::Value *CV,
8459                              MapBaseValuesArrayTy &CurBasePointers,
8460                              MapValuesArrayTy &CurPointers,
8461                              MapValuesArrayTy &CurSizes,
8462                              MapFlagsArrayTy &CurMapTypes) const {
8463    bool IsImplicit = true;
8464    // Do the default mapping.
8465    if (CI.capturesThis()) {
8466      CurBasePointers.push_back(CV);
8467      CurPointers.push_back(CV);
8468      const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8469      CurSizes.push_back(
8470          CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8471                                    CGF.Int64Ty, /*isSigned=*/true));
8472      // Default map type.
8473      CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8474    } else if (CI.capturesVariableByCopy()) {
8475      CurBasePointers.push_back(CV);
8476      CurPointers.push_back(CV);
8477      if (!RI.getType()->isAnyPointerType()) {
8478        // We have to signal to the runtime captures passed by value that are
8479        // not pointers.
8480        CurMapTypes.push_back(OMP_MAP_LITERAL);
8481        CurSizes.push_back(CGF.Builder.CreateIntCast(
8482            CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8483      } else {
8484        // Pointers are implicitly mapped with a zero size and no flags
8485        // (other than first map that is added for all implicit maps).
8486        CurMapTypes.push_back(OMP_MAP_NONE);
8487        CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8488      }
8489      const VarDecl *VD = CI.getCapturedVar();
8490      auto I = FirstPrivateDecls.find(VD);
8491      if (I != FirstPrivateDecls.end())
8492        IsImplicit = I->getSecond();
8493    } else {
8494      assert(CI.capturesVariable() && "Expected captured reference.");
8495      const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8496      QualType ElementType = PtrTy->getPointeeType();
8497      CurSizes.push_back(CGF.Builder.CreateIntCast(
8498          CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8499      // The default map type for a scalar/complex type is 'to' because by
8500      // default the value doesn't have to be retrieved. For an aggregate
8501      // type, the default is 'tofrom'.
8502      CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8503      const VarDecl *VD = CI.getCapturedVar();
8504      auto I = FirstPrivateDecls.find(VD);
8505      if (I != FirstPrivateDecls.end() &&
8506          VD->getType().isConstant(CGF.getContext())) {
8507        llvm::Constant *Addr =
8508            CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8509        // Copy the value of the original variable to the new global copy.
8510        CGF.Builder.CreateMemCpy(
8511            CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8512            Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8513            CurSizes.back(), /*IsVolatile=*/false);
8514        // Use new global variable as the base pointers.
8515        CurBasePointers.push_back(Addr);
8516        CurPointers.push_back(Addr);
8517      } else {
8518        CurBasePointers.push_back(CV);
8519        if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8520          Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8521              CV, ElementType, CGF.getContext().getDeclAlign(VD),
8522              AlignmentSource::Decl));
8523          CurPointers.push_back(PtrAddr.getPointer());
8524        } else {
8525          CurPointers.push_back(CV);
8526        }
8527      }
8528      if (I != FirstPrivateDecls.end())
8529        IsImplicit = I->getSecond();
8530    }
8531    // Every default map produces a single argument which is a target parameter.
8532    CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8533
8534    // Add flag stating this is an implicit map.
8535    if (IsImplicit)
8536      CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8537  }
8538};
8539} // anonymous namespace
8540
8541/// Emit the arrays used to pass the captures and map information to the
8542/// offloading runtime library. If there is no map or capture information,
8543/// return nullptr by reference.
8544static void
8545emitOffloadingArrays(CodeGenFunction &CGF,
8546                     MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8547                     MappableExprsHandler::MapValuesArrayTy &Pointers,
8548                     MappableExprsHandler::MapValuesArrayTy &Sizes,
8549                     MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8550                     CGOpenMPRuntime::TargetDataInfo &Info) {
8551  CodeGenModule &CGM = CGF.CGM;
8552  ASTContext &Ctx = CGF.getContext();
8553
8554  // Reset the array information.
8555  Info.clearArrayInfo();
8556  Info.NumberOfPtrs = BasePointers.size();
8557
8558  if (Info.NumberOfPtrs) {
8559    // Detect if we have any capture size requiring runtime evaluation of the
8560    // size so that a constant array could be eventually used.
8561    bool hasRuntimeEvaluationCaptureSize = false;
8562    for (llvm::Value *S : Sizes)
8563      if (!isa<llvm::Constant>(S)) {
8564        hasRuntimeEvaluationCaptureSize = true;
8565        break;
8566      }
8567
8568    llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8569    QualType PointerArrayType = Ctx.getConstantArrayType(
8570        Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8571        /*IndexTypeQuals=*/0);
8572
8573    Info.BasePointersArray =
8574        CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8575    Info.PointersArray =
8576        CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8577
8578    // If we don't have any VLA types or other types that require runtime
8579    // evaluation, we can use a constant array for the map sizes, otherwise we
8580    // need to fill up the arrays as we do for the pointers.
8581    QualType Int64Ty =
8582        Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8583    if (hasRuntimeEvaluationCaptureSize) {
8584      QualType SizeArrayType = Ctx.getConstantArrayType(
8585          Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8586          /*IndexTypeQuals=*/0);
8587      Info.SizesArray =
8588          CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8589    } else {
8590      // We expect all the sizes to be constant, so we collect them to create
8591      // a constant array.
8592      SmallVector<llvm::Constant *, 16> ConstSizes;
8593      for (llvm::Value *S : Sizes)
8594        ConstSizes.push_back(cast<llvm::Constant>(S));
8595
8596      auto *SizesArrayInit = llvm::ConstantArray::get(
8597          llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8598      std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8599      auto *SizesArrayGbl = new llvm::GlobalVariable(
8600          CGM.getModule(), SizesArrayInit->getType(),
8601          /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8602          SizesArrayInit, Name);
8603      SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8604      Info.SizesArray = SizesArrayGbl;
8605    }
8606
8607    // The map types are always constant so we don't need to generate code to
8608    // fill arrays. Instead, we create an array constant.
8609    SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8610    llvm::copy(MapTypes, Mapping.begin());
8611    llvm::Constant *MapTypesArrayInit =
8612        llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8613    std::string MaptypesName =
8614        CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8615    auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8616        CGM.getModule(), MapTypesArrayInit->getType(),
8617        /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8618        MapTypesArrayInit, MaptypesName);
8619    MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8620    Info.MapTypesArray = MapTypesArrayGbl;
8621
8622    for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8623      llvm::Value *BPVal = *BasePointers[I];
8624      llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8625          llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8626          Info.BasePointersArray, 0, I);
8627      BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8628          BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8629      Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8630      CGF.Builder.CreateStore(BPVal, BPAddr);
8631
8632      if (Info.requiresDevicePointerInfo())
8633        if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8634          Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8635
8636      llvm::Value *PVal = Pointers[I];
8637      llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8638          llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8639          Info.PointersArray, 0, I);
8640      P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8641          P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8642      Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8643      CGF.Builder.CreateStore(PVal, PAddr);
8644
8645      if (hasRuntimeEvaluationCaptureSize) {
8646        llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8647            llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8648            Info.SizesArray,
8649            /*Idx0=*/0,
8650            /*Idx1=*/I);
8651        Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8652        CGF.Builder.CreateStore(
8653            CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8654            SAddr);
8655      }
8656    }
8657  }
8658}
8659
8660/// Emit the arguments to be passed to the runtime library based on the
8661/// arrays of pointers, sizes and map types.
8662static void emitOffloadingArraysArgument(
8663    CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8664    llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8665    llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8666  CodeGenModule &CGM = CGF.CGM;
8667  if (Info.NumberOfPtrs) {
8668    BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8669        llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8670        Info.BasePointersArray,
8671        /*Idx0=*/0, /*Idx1=*/0);
8672    PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8673        llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8674        Info.PointersArray,
8675        /*Idx0=*/0,
8676        /*Idx1=*/0);
8677    SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8678        llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8679        /*Idx0=*/0, /*Idx1=*/0);
8680    MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8681        llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8682        Info.MapTypesArray,
8683        /*Idx0=*/0,
8684        /*Idx1=*/0);
8685  } else {
8686    BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8687    PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8688    SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8689    MapTypesArrayArg =
8690        llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8691  }
8692}
8693
8694/// Check for inner distribute directive.
8695static const OMPExecutableDirective *
8696getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8697  const auto *CS = D.getInnermostCapturedStmt();
8698  const auto *Body =
8699      CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8700  const Stmt *ChildStmt =
8701      CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8702
8703  if (const auto *NestedDir =
8704          dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8705    OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8706    switch (D.getDirectiveKind()) {
8707    case OMPD_target:
8708      if (isOpenMPDistributeDirective(DKind))
8709        return NestedDir;
8710      if (DKind == OMPD_teams) {
8711        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8712            /*IgnoreCaptured=*/true);
8713        if (!Body)
8714          return nullptr;
8715        ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8716        if (const auto *NND =
8717                dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8718          DKind = NND->getDirectiveKind();
8719          if (isOpenMPDistributeDirective(DKind))
8720            return NND;
8721        }
8722      }
8723      return nullptr;
8724    case OMPD_target_teams:
8725      if (isOpenMPDistributeDirective(DKind))
8726        return NestedDir;
8727      return nullptr;
8728    case OMPD_target_parallel:
8729    case OMPD_target_simd:
8730    case OMPD_target_parallel_for:
8731    case OMPD_target_parallel_for_simd:
8732      return nullptr;
8733    case OMPD_target_teams_distribute:
8734    case OMPD_target_teams_distribute_simd:
8735    case OMPD_target_teams_distribute_parallel_for:
8736    case OMPD_target_teams_distribute_parallel_for_simd:
8737    case OMPD_parallel:
8738    case OMPD_for:
8739    case OMPD_parallel_for:
8740    case OMPD_parallel_master:
8741    case OMPD_parallel_sections:
8742    case OMPD_for_simd:
8743    case OMPD_parallel_for_simd:
8744    case OMPD_cancel:
8745    case OMPD_cancellation_point:
8746    case OMPD_ordered:
8747    case OMPD_threadprivate:
8748    case OMPD_allocate:
8749    case OMPD_task:
8750    case OMPD_simd:
8751    case OMPD_sections:
8752    case OMPD_section:
8753    case OMPD_single:
8754    case OMPD_master:
8755    case OMPD_critical:
8756    case OMPD_taskyield:
8757    case OMPD_barrier:
8758    case OMPD_taskwait:
8759    case OMPD_taskgroup:
8760    case OMPD_atomic:
8761    case OMPD_flush:
8762    case OMPD_teams:
8763    case OMPD_target_data:
8764    case OMPD_target_exit_data:
8765    case OMPD_target_enter_data:
8766    case OMPD_distribute:
8767    case OMPD_distribute_simd:
8768    case OMPD_distribute_parallel_for:
8769    case OMPD_distribute_parallel_for_simd:
8770    case OMPD_teams_distribute:
8771    case OMPD_teams_distribute_simd:
8772    case OMPD_teams_distribute_parallel_for:
8773    case OMPD_teams_distribute_parallel_for_simd:
8774    case OMPD_target_update:
8775    case OMPD_declare_simd:
8776    case OMPD_declare_variant:
8777    case OMPD_declare_target:
8778    case OMPD_end_declare_target:
8779    case OMPD_declare_reduction:
8780    case OMPD_declare_mapper:
8781    case OMPD_taskloop:
8782    case OMPD_taskloop_simd:
8783    case OMPD_master_taskloop:
8784    case OMPD_master_taskloop_simd:
8785    case OMPD_parallel_master_taskloop:
8786    case OMPD_parallel_master_taskloop_simd:
8787    case OMPD_requires:
8788    case OMPD_unknown:
8789      llvm_unreachable("Unexpected directive.");
8790    }
8791  }
8792
8793  return nullptr;
8794}
8795
8796/// Emit the user-defined mapper function. The code generation follows the
8797/// pattern in the example below.
8798/// \code
8799/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8800///                                           void *base, void *begin,
8801///                                           int64_t size, int64_t type) {
8802///   // Allocate space for an array section first.
8803///   if (size > 1 && !maptype.IsDelete)
8804///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8805///                                 size*sizeof(Ty), clearToFrom(type));
8806///   // Map members.
8807///   for (unsigned i = 0; i < size; i++) {
8808///     // For each component specified by this mapper:
8809///     for (auto c : all_components) {
8810///       if (c.hasMapper())
8811///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8812///                       c.arg_type);
8813///       else
8814///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8815///                                     c.arg_begin, c.arg_size, c.arg_type);
8816///     }
8817///   }
8818///   // Delete the array section.
8819///   if (size > 1 && maptype.IsDelete)
8820///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8821///                                 size*sizeof(Ty), clearToFrom(type));
8822/// }
8823/// \endcode
8824void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8825                                            CodeGenFunction *CGF) {
8826  if (UDMMap.count(D) > 0)
8827    return;
8828  ASTContext &C = CGM.getContext();
8829  QualType Ty = D->getType();
8830  QualType PtrTy = C.getPointerType(Ty).withRestrict();
8831  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8832  auto *MapperVarDecl =
8833      cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8834  SourceLocation Loc = D->getLocation();
8835  CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8836
8837  // Prepare mapper function arguments and attributes.
8838  ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8839                              C.VoidPtrTy, ImplicitParamDecl::Other);
8840  ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8841                            ImplicitParamDecl::Other);
8842  ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8843                             C.VoidPtrTy, ImplicitParamDecl::Other);
8844  ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8845                            ImplicitParamDecl::Other);
8846  ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8847                            ImplicitParamDecl::Other);
8848  FunctionArgList Args;
8849  Args.push_back(&HandleArg);
8850  Args.push_back(&BaseArg);
8851  Args.push_back(&BeginArg);
8852  Args.push_back(&SizeArg);
8853  Args.push_back(&TypeArg);
8854  const CGFunctionInfo &FnInfo =
8855      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8856  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8857  SmallString<64> TyStr;
8858  llvm::raw_svector_ostream Out(TyStr);
8859  CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8860  std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8861  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8862                                    Name, &CGM.getModule());
8863  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8864  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8865  // Start the mapper function code generation.
8866  CodeGenFunction MapperCGF(CGM);
8867  MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8868  // Compute the starting and end addreses of array elements.
8869  llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8870      MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8871      C.getPointerType(Int64Ty), Loc);
8872  llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8873      MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8874      CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
8875  llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8876  llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8877      MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8878      C.getPointerType(Int64Ty), Loc);
8879  // Prepare common arguments for array initiation and deletion.
8880  llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8881      MapperCGF.GetAddrOfLocalVar(&HandleArg),
8882      /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8883  llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8884      MapperCGF.GetAddrOfLocalVar(&BaseArg),
8885      /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8886  llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8887      MapperCGF.GetAddrOfLocalVar(&BeginArg),
8888      /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8889
8890  // Emit array initiation if this is an array section and \p MapType indicates
8891  // that memory allocation is required.
8892  llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8893  emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8894                             ElementSize, HeadBB, /*IsInit=*/true);
8895
8896  // Emit a for loop to iterate through SizeArg of elements and map all of them.
8897
8898  // Emit the loop header block.
8899  MapperCGF.EmitBlock(HeadBB);
8900  llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8901  llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8902  // Evaluate whether the initial condition is satisfied.
8903  llvm::Value *IsEmpty =
8904      MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8905  MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8906  llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8907
8908  // Emit the loop body block.
8909  MapperCGF.EmitBlock(BodyBB);
8910  llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8911      PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8912  PtrPHI->addIncoming(PtrBegin, EntryBB);
8913  Address PtrCurrent =
8914      Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
8915                          .getAlignment()
8916                          .alignmentOfArrayElement(ElementSize));
8917  // Privatize the declared variable of mapper to be the current array element.
8918  CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
8919  Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
8920    return MapperCGF
8921        .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
8922        .getAddress(MapperCGF);
8923  });
8924  (void)Scope.Privatize();
8925
8926  // Get map clause information. Fill up the arrays with all mapped variables.
8927  MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8928  MappableExprsHandler::MapValuesArrayTy Pointers;
8929  MappableExprsHandler::MapValuesArrayTy Sizes;
8930  MappableExprsHandler::MapFlagsArrayTy MapTypes;
8931  MappableExprsHandler MEHandler(*D, MapperCGF);
8932  MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
8933
8934  // Call the runtime API __tgt_mapper_num_components to get the number of
8935  // pre-existing components.
8936  llvm::Value *OffloadingArgs[] = {Handle};
8937  llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
8938      createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
8939  llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
8940      PreviousSize,
8941      MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
8942
8943  // Fill up the runtime mapper handle for all components.
8944  for (unsigned I = 0; I < BasePointers.size(); ++I) {
8945    llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
8946        *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8947    llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
8948        Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8949    llvm::Value *CurSizeArg = Sizes[I];
8950
8951    // Extract the MEMBER_OF field from the map type.
8952    llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
8953    MapperCGF.EmitBlock(MemberBB);
8954    llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
8955    llvm::Value *Member = MapperCGF.Builder.CreateAnd(
8956        OriMapType,
8957        MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
8958    llvm::BasicBlock *MemberCombineBB =
8959        MapperCGF.createBasicBlock("omp.member.combine");
8960    llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
8961    llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
8962    MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
8963    // Add the number of pre-existing components to the MEMBER_OF field if it
8964    // is valid.
8965    MapperCGF.EmitBlock(MemberCombineBB);
8966    llvm::Value *CombinedMember =
8967        MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8968    // Do nothing if it is not a member of previous components.
8969    MapperCGF.EmitBlock(TypeBB);
8970    llvm::PHINode *MemberMapType =
8971        MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
8972    MemberMapType->addIncoming(OriMapType, MemberBB);
8973    MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
8974
8975    // Combine the map type inherited from user-defined mapper with that
8976    // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
8977    // bits of the \a MapType, which is the input argument of the mapper
8978    // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
8979    // bits of MemberMapType.
8980    // [OpenMP 5.0], 1.2.6. map-type decay.
8981    //        | alloc |  to   | from  | tofrom | release | delete
8982    // ----------------------------------------------------------
8983    // alloc  | alloc | alloc | alloc | alloc  | release | delete
8984    // to     | alloc |  to   | alloc |   to   | release | delete
8985    // from   | alloc | alloc | from  |  from  | release | delete
8986    // tofrom | alloc |  to   | from  | tofrom | release | delete
8987    llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
8988        MapType,
8989        MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
8990                                   MappableExprsHandler::OMP_MAP_FROM));
8991    llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
8992    llvm::BasicBlock *AllocElseBB =
8993        MapperCGF.createBasicBlock("omp.type.alloc.else");
8994    llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
8995    llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
8996    llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
8997    llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
8998    llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
8999    MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9000    // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9001    MapperCGF.EmitBlock(AllocBB);
9002    llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9003        MemberMapType,
9004        MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9005                                     MappableExprsHandler::OMP_MAP_FROM)));
9006    MapperCGF.Builder.CreateBr(EndBB);
9007    MapperCGF.EmitBlock(AllocElseBB);
9008    llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9009        LeftToFrom,
9010        MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9011    MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9012    // In case of to, clear OMP_MAP_FROM.
9013    MapperCGF.EmitBlock(ToBB);
9014    llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9015        MemberMapType,
9016        MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9017    MapperCGF.Builder.CreateBr(EndBB);
9018    MapperCGF.EmitBlock(ToElseBB);
9019    llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9020        LeftToFrom,
9021        MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9022    MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9023    // In case of from, clear OMP_MAP_TO.
9024    MapperCGF.EmitBlock(FromBB);
9025    llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9026        MemberMapType,
9027        MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9028    // In case of tofrom, do nothing.
9029    MapperCGF.EmitBlock(EndBB);
9030    llvm::PHINode *CurMapType =
9031        MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9032    CurMapType->addIncoming(AllocMapType, AllocBB);
9033    CurMapType->addIncoming(ToMapType, ToBB);
9034    CurMapType->addIncoming(FromMapType, FromBB);
9035    CurMapType->addIncoming(MemberMapType, ToElseBB);
9036
9037    // TODO: call the corresponding mapper function if a user-defined mapper is
9038    // associated with this map clause.
9039    // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9040    // data structure.
9041    llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9042                                     CurSizeArg, CurMapType};
9043    MapperCGF.EmitRuntimeCall(
9044        createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9045        OffloadingArgs);
9046  }
9047
9048  // Update the pointer to point to the next element that needs to be mapped,
9049  // and check whether we have mapped all elements.
9050  llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9051      PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9052  PtrPHI->addIncoming(PtrNext, BodyBB);
9053  llvm::Value *IsDone =
9054      MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9055  llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9056  MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9057
9058  MapperCGF.EmitBlock(ExitBB);
9059  // Emit array deletion if this is an array section and \p MapType indicates
9060  // that deletion is required.
9061  emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9062                             ElementSize, DoneBB, /*IsInit=*/false);
9063
9064  // Emit the function exit block.
9065  MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9066  MapperCGF.FinishFunction();
9067  UDMMap.try_emplace(D, Fn);
9068  if (CGF) {
9069    auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9070    Decls.second.push_back(D);
9071  }
9072}
9073
9074/// Emit the array initialization or deletion portion for user-defined mapper
9075/// code generation. First, it evaluates whether an array section is mapped and
9076/// whether the \a MapType instructs to delete this section. If \a IsInit is
9077/// true, and \a MapType indicates to not delete this array, array
9078/// initialization code is generated. If \a IsInit is false, and \a MapType
9079/// indicates to not this array, array deletion code is generated.
9080void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9081    CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9082    llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9083    CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9084  StringRef Prefix = IsInit ? ".init" : ".del";
9085
9086  // Evaluate if this is an array section.
9087  llvm::BasicBlock *IsDeleteBB =
9088      MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
9089  llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
9090  llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9091      Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9092  MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9093
9094  // Evaluate if we are going to delete this section.
9095  MapperCGF.EmitBlock(IsDeleteBB);
9096  llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9097      MapType,
9098      MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9099  llvm::Value *DeleteCond;
9100  if (IsInit) {
9101    DeleteCond = MapperCGF.Builder.CreateIsNull(
9102        DeleteBit, "omp.array" + Prefix + ".delete");
9103  } else {
9104    DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9105        DeleteBit, "omp.array" + Prefix + ".delete");
9106  }
9107  MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9108
9109  MapperCGF.EmitBlock(BodyBB);
9110  // Get the array size by multiplying element size and element number (i.e., \p
9111  // Size).
9112  llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9113      Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9114  // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9115  // memory allocation/deletion purpose only.
9116  llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9117      MapType,
9118      MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9119                                   MappableExprsHandler::OMP_MAP_FROM)));
9120  // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9121  // data structure.
9122  llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9123  MapperCGF.EmitRuntimeCall(
9124      createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9125}
9126
9127void CGOpenMPRuntime::emitTargetNumIterationsCall(
9128    CodeGenFunction &CGF, const OMPExecutableDirective &D,
9129    llvm::Value *DeviceID,
9130    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9131                                     const OMPLoopDirective &D)>
9132        SizeEmitter) {
9133  OpenMPDirectiveKind Kind = D.getDirectiveKind();
9134  const OMPExecutableDirective *TD = &D;
9135  // Get nested teams distribute kind directive, if any.
9136  if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9137    TD = getNestedDistributeDirective(CGM.getContext(), D);
9138  if (!TD)
9139    return;
9140  const auto *LD = cast<OMPLoopDirective>(TD);
9141  auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9142                                                     PrePostActionTy &) {
9143    if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9144      llvm::Value *Args[] = {DeviceID, NumIterations};
9145      CGF.EmitRuntimeCall(
9146          createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9147    }
9148  };
9149  emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9150}
9151
9152void CGOpenMPRuntime::emitTargetCall(
9153    CodeGenFunction &CGF, const OMPExecutableDirective &D,
9154    llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9155    const Expr *Device,
9156    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9157                                     const OMPLoopDirective &D)>
9158        SizeEmitter) {
9159  if (!CGF.HaveInsertPoint())
9160    return;
9161
9162  assert(OutlinedFn && "Invalid outlined function!");
9163
9164  const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9165  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9166  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9167  auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9168                                            PrePostActionTy &) {
9169    CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9170  };
9171  emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9172
9173  CodeGenFunction::OMPTargetDataInfo InputInfo;
9174  llvm::Value *MapTypesArray = nullptr;
9175  // Fill up the pointer arrays and transfer execution to the device.
9176  auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9177                    &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9178                    SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9179    // On top of the arrays that were filled up, the target offloading call
9180    // takes as arguments the device id as well as the host pointer. The host
9181    // pointer is used by the runtime library to identify the current target
9182    // region, so it only has to be unique and not necessarily point to
9183    // anything. It could be the pointer to the outlined function that
9184    // implements the target region, but we aren't using that so that the
9185    // compiler doesn't need to keep that, and could therefore inline the host
9186    // function if proven worthwhile during optimization.
9187
9188    // From this point on, we need to have an ID of the target region defined.
9189    assert(OutlinedFnID && "Invalid outlined function ID!");
9190
9191    // Emit device ID if any.
9192    llvm::Value *DeviceID;
9193    if (Device) {
9194      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9195                                           CGF.Int64Ty, /*isSigned=*/true);
9196    } else {
9197      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9198    }
9199
9200    // Emit the number of elements in the offloading arrays.
9201    llvm::Value *PointerNum =
9202        CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9203
9204    // Return value of the runtime offloading call.
9205    llvm::Value *Return;
9206
9207    llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9208    llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9209
9210    // Emit tripcount for the target loop-based directive.
9211    emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9212
9213    bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9214    // The target region is an outlined function launched by the runtime
9215    // via calls __tgt_target() or __tgt_target_teams().
9216    //
9217    // __tgt_target() launches a target region with one team and one thread,
9218    // executing a serial region.  This master thread may in turn launch
9219    // more threads within its team upon encountering a parallel region,
9220    // however, no additional teams can be launched on the device.
9221    //
9222    // __tgt_target_teams() launches a target region with one or more teams,
9223    // each with one or more threads.  This call is required for target
9224    // constructs such as:
9225    //  'target teams'
9226    //  'target' / 'teams'
9227    //  'target teams distribute parallel for'
9228    //  'target parallel'
9229    // and so on.
9230    //
9231    // Note that on the host and CPU targets, the runtime implementation of
9232    // these calls simply call the outlined function without forking threads.
9233    // The outlined functions themselves have runtime calls to
9234    // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9235    // the compiler in emitTeamsCall() and emitParallelCall().
9236    //
9237    // In contrast, on the NVPTX target, the implementation of
9238    // __tgt_target_teams() launches a GPU kernel with the requested number
9239    // of teams and threads so no additional calls to the runtime are required.
9240    if (NumTeams) {
9241      // If we have NumTeams defined this means that we have an enclosed teams
9242      // region. Therefore we also expect to have NumThreads defined. These two
9243      // values should be defined in the presence of a teams directive,
9244      // regardless of having any clauses associated. If the user is using teams
9245      // but no clauses, these two values will be the default that should be
9246      // passed to the runtime library - a 32-bit integer with the value zero.
9247      assert(NumThreads && "Thread limit expression should be available along "
9248                           "with number of teams.");
9249      llvm::Value *OffloadingArgs[] = {DeviceID,
9250                                       OutlinedFnID,
9251                                       PointerNum,
9252                                       InputInfo.BasePointersArray.getPointer(),
9253                                       InputInfo.PointersArray.getPointer(),
9254                                       InputInfo.SizesArray.getPointer(),
9255                                       MapTypesArray,
9256                                       NumTeams,
9257                                       NumThreads};
9258      Return = CGF.EmitRuntimeCall(
9259          createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9260                                          : OMPRTL__tgt_target_teams),
9261          OffloadingArgs);
9262    } else {
9263      llvm::Value *OffloadingArgs[] = {DeviceID,
9264                                       OutlinedFnID,
9265                                       PointerNum,
9266                                       InputInfo.BasePointersArray.getPointer(),
9267                                       InputInfo.PointersArray.getPointer(),
9268                                       InputInfo.SizesArray.getPointer(),
9269                                       MapTypesArray};
9270      Return = CGF.EmitRuntimeCall(
9271          createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9272                                          : OMPRTL__tgt_target),
9273          OffloadingArgs);
9274    }
9275
9276    // Check the error code and execute the host version if required.
9277    llvm::BasicBlock *OffloadFailedBlock =
9278        CGF.createBasicBlock("omp_offload.failed");
9279    llvm::BasicBlock *OffloadContBlock =
9280        CGF.createBasicBlock("omp_offload.cont");
9281    llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9282    CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9283
9284    CGF.EmitBlock(OffloadFailedBlock);
9285    if (RequiresOuterTask) {
9286      CapturedVars.clear();
9287      CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9288    }
9289    emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9290    CGF.EmitBranch(OffloadContBlock);
9291
9292    CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9293  };
9294
9295  // Notify that the host version must be executed.
9296  auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9297                    RequiresOuterTask](CodeGenFunction &CGF,
9298                                       PrePostActionTy &) {
9299    if (RequiresOuterTask) {
9300      CapturedVars.clear();
9301      CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9302    }
9303    emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9304  };
9305
9306  auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9307                          &CapturedVars, RequiresOuterTask,
9308                          &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9309    // Fill up the arrays with all the captured variables.
9310    MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9311    MappableExprsHandler::MapValuesArrayTy Pointers;
9312    MappableExprsHandler::MapValuesArrayTy Sizes;
9313    MappableExprsHandler::MapFlagsArrayTy MapTypes;
9314
9315    // Get mappable expression information.
9316    MappableExprsHandler MEHandler(D, CGF);
9317    llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9318
9319    auto RI = CS.getCapturedRecordDecl()->field_begin();
9320    auto CV = CapturedVars.begin();
9321    for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9322                                              CE = CS.capture_end();
9323         CI != CE; ++CI, ++RI, ++CV) {
9324      MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9325      MappableExprsHandler::MapValuesArrayTy CurPointers;
9326      MappableExprsHandler::MapValuesArrayTy CurSizes;
9327      MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9328      MappableExprsHandler::StructRangeInfoTy PartialStruct;
9329
9330      // VLA sizes are passed to the outlined region by copy and do not have map
9331      // information associated.
9332      if (CI->capturesVariableArrayType()) {
9333        CurBasePointers.push_back(*CV);
9334        CurPointers.push_back(*CV);
9335        CurSizes.push_back(CGF.Builder.CreateIntCast(
9336            CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9337        // Copy to the device as an argument. No need to retrieve it.
9338        CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9339                              MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9340                              MappableExprsHandler::OMP_MAP_IMPLICIT);
9341      } else {
9342        // If we have any information in the map clause, we use it, otherwise we
9343        // just do a default mapping.
9344        MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9345                                         CurSizes, CurMapTypes, PartialStruct);
9346        if (CurBasePointers.empty())
9347          MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9348                                           CurPointers, CurSizes, CurMapTypes);
9349        // Generate correct mapping for variables captured by reference in
9350        // lambdas.
9351        if (CI->capturesVariable())
9352          MEHandler.generateInfoForLambdaCaptures(
9353              CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9354              CurMapTypes, LambdaPointers);
9355      }
9356      // We expect to have at least an element of information for this capture.
9357      assert(!CurBasePointers.empty() &&
9358             "Non-existing map pointer for capture!");
9359      assert(CurBasePointers.size() == CurPointers.size() &&
9360             CurBasePointers.size() == CurSizes.size() &&
9361             CurBasePointers.size() == CurMapTypes.size() &&
9362             "Inconsistent map information sizes!");
9363
9364      // If there is an entry in PartialStruct it means we have a struct with
9365      // individual members mapped. Emit an extra combined entry.
9366      if (PartialStruct.Base.isValid())
9367        MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9368                                    CurMapTypes, PartialStruct);
9369
9370      // We need to append the results of this capture to what we already have.
9371      BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9372      Pointers.append(CurPointers.begin(), CurPointers.end());
9373      Sizes.append(CurSizes.begin(), CurSizes.end());
9374      MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9375    }
9376    // Adjust MEMBER_OF flags for the lambdas captures.
9377    MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9378                                              Pointers, MapTypes);
9379    // Map other list items in the map clause which are not captured variables
9380    // but "declare target link" global variables.
9381    MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9382                                               MapTypes);
9383
9384    TargetDataInfo Info;
9385    // Fill up the arrays and create the arguments.
9386    emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9387    emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9388                                 Info.PointersArray, Info.SizesArray,
9389                                 Info.MapTypesArray, Info);
9390    InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9391    InputInfo.BasePointersArray =
9392        Address(Info.BasePointersArray, CGM.getPointerAlign());
9393    InputInfo.PointersArray =
9394        Address(Info.PointersArray, CGM.getPointerAlign());
9395    InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9396    MapTypesArray = Info.MapTypesArray;
9397    if (RequiresOuterTask)
9398      CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9399    else
9400      emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9401  };
9402
9403  auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9404                             CodeGenFunction &CGF, PrePostActionTy &) {
9405    if (RequiresOuterTask) {
9406      CodeGenFunction::OMPTargetDataInfo InputInfo;
9407      CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9408    } else {
9409      emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9410    }
9411  };
9412
9413  // If we have a target function ID it means that we need to support
9414  // offloading, otherwise, just execute on the host. We need to execute on host
9415  // regardless of the conditional in the if clause if, e.g., the user do not
9416  // specify target triples.
9417  if (OutlinedFnID) {
9418    if (IfCond) {
9419      emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9420    } else {
9421      RegionCodeGenTy ThenRCG(TargetThenGen);
9422      ThenRCG(CGF);
9423    }
9424  } else {
9425    RegionCodeGenTy ElseRCG(TargetElseGen);
9426    ElseRCG(CGF);
9427  }
9428}
9429
9430void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9431                                                    StringRef ParentName) {
9432  if (!S)
9433    return;
9434
9435  // Codegen OMP target directives that offload compute to the device.
9436  bool RequiresDeviceCodegen =
9437      isa<OMPExecutableDirective>(S) &&
9438      isOpenMPTargetExecutionDirective(
9439          cast<OMPExecutableDirective>(S)->getDirectiveKind());
9440
9441  if (RequiresDeviceCodegen) {
9442    const auto &E = *cast<OMPExecutableDirective>(S);
9443    unsigned DeviceID;
9444    unsigned FileID;
9445    unsigned Line;
9446    getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9447                             FileID, Line);
9448
9449    // Is this a target region that should not be emitted as an entry point? If
9450    // so just signal we are done with this target region.
9451    if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9452                                                            ParentName, Line))
9453      return;
9454
9455    switch (E.getDirectiveKind()) {
9456    case OMPD_target:
9457      CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9458                                                   cast<OMPTargetDirective>(E));
9459      break;
9460    case OMPD_target_parallel:
9461      CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9462          CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9463      break;
9464    case OMPD_target_teams:
9465      CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9466          CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9467      break;
9468    case OMPD_target_teams_distribute:
9469      CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9470          CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9471      break;
9472    case OMPD_target_teams_distribute_simd:
9473      CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9474          CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9475      break;
9476    case OMPD_target_parallel_for:
9477      CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9478          CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9479      break;
9480    case OMPD_target_parallel_for_simd:
9481      CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9482          CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9483      break;
9484    case OMPD_target_simd:
9485      CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9486          CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9487      break;
9488    case OMPD_target_teams_distribute_parallel_for:
9489      CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9490          CGM, ParentName,
9491          cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9492      break;
9493    case OMPD_target_teams_distribute_parallel_for_simd:
9494      CodeGenFunction::
9495          EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9496              CGM, ParentName,
9497              cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9498      break;
9499    case OMPD_parallel:
9500    case OMPD_for:
9501    case OMPD_parallel_for:
9502    case OMPD_parallel_master:
9503    case OMPD_parallel_sections:
9504    case OMPD_for_simd:
9505    case OMPD_parallel_for_simd:
9506    case OMPD_cancel:
9507    case OMPD_cancellation_point:
9508    case OMPD_ordered:
9509    case OMPD_threadprivate:
9510    case OMPD_allocate:
9511    case OMPD_task:
9512    case OMPD_simd:
9513    case OMPD_sections:
9514    case OMPD_section:
9515    case OMPD_single:
9516    case OMPD_master:
9517    case OMPD_critical:
9518    case OMPD_taskyield:
9519    case OMPD_barrier:
9520    case OMPD_taskwait:
9521    case OMPD_taskgroup:
9522    case OMPD_atomic:
9523    case OMPD_flush:
9524    case OMPD_teams:
9525    case OMPD_target_data:
9526    case OMPD_target_exit_data:
9527    case OMPD_target_enter_data:
9528    case OMPD_distribute:
9529    case OMPD_distribute_simd:
9530    case OMPD_distribute_parallel_for:
9531    case OMPD_distribute_parallel_for_simd:
9532    case OMPD_teams_distribute:
9533    case OMPD_teams_distribute_simd:
9534    case OMPD_teams_distribute_parallel_for:
9535    case OMPD_teams_distribute_parallel_for_simd:
9536    case OMPD_target_update:
9537    case OMPD_declare_simd:
9538    case OMPD_declare_variant:
9539    case OMPD_declare_target:
9540    case OMPD_end_declare_target:
9541    case OMPD_declare_reduction:
9542    case OMPD_declare_mapper:
9543    case OMPD_taskloop:
9544    case OMPD_taskloop_simd:
9545    case OMPD_master_taskloop:
9546    case OMPD_master_taskloop_simd:
9547    case OMPD_parallel_master_taskloop:
9548    case OMPD_parallel_master_taskloop_simd:
9549    case OMPD_requires:
9550    case OMPD_unknown:
9551      llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9552    }
9553    return;
9554  }
9555
9556  if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9557    if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9558      return;
9559
9560    scanForTargetRegionsFunctions(
9561        E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9562    return;
9563  }
9564
9565  // If this is a lambda function, look into its body.
9566  if (const auto *L = dyn_cast<LambdaExpr>(S))
9567    S = L->getBody();
9568
9569  // Keep looking for target regions recursively.
9570  for (const Stmt *II : S->children())
9571    scanForTargetRegionsFunctions(II, ParentName);
9572}
9573
9574bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9575  // If emitting code for the host, we do not process FD here. Instead we do
9576  // the normal code generation.
9577  if (!CGM.getLangOpts().OpenMPIsDevice) {
9578    if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9579      Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9580          OMPDeclareTargetDeclAttr::getDeviceType(FD);
9581      // Do not emit device_type(nohost) functions for the host.
9582      if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9583        return true;
9584    }
9585    return false;
9586  }
9587
9588  const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9589  // Try to detect target regions in the function.
9590  if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9591    StringRef Name = CGM.getMangledName(GD);
9592    scanForTargetRegionsFunctions(FD->getBody(), Name);
9593    Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9594        OMPDeclareTargetDeclAttr::getDeviceType(FD);
9595    // Do not emit device_type(nohost) functions for the host.
9596    if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9597      return true;
9598  }
9599
9600  // Do not to emit function if it is not marked as declare target.
9601  return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9602         AlreadyEmittedTargetDecls.count(VD) == 0;
9603}
9604
9605bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9606  if (!CGM.getLangOpts().OpenMPIsDevice)
9607    return false;
9608
9609  // Check if there are Ctors/Dtors in this declaration and look for target
9610  // regions in it. We use the complete variant to produce the kernel name
9611  // mangling.
9612  QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9613  if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9614    for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9615      StringRef ParentName =
9616          CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9617      scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9618    }
9619    if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9620      StringRef ParentName =
9621          CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9622      scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9623    }
9624  }
9625
9626  // Do not to emit variable if it is not marked as declare target.
9627  llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9628      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9629          cast<VarDecl>(GD.getDecl()));
9630  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9631      (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9632       HasRequiresUnifiedSharedMemory)) {
9633    DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9634    return true;
9635  }
9636  return false;
9637}
9638
9639llvm::Constant *
9640CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9641                                                const VarDecl *VD) {
9642  assert(VD->getType().isConstant(CGM.getContext()) &&
9643         "Expected constant variable.");
9644  StringRef VarName;
9645  llvm::Constant *Addr;
9646  llvm::GlobalValue::LinkageTypes Linkage;
9647  QualType Ty = VD->getType();
9648  SmallString<128> Buffer;
9649  {
9650    unsigned DeviceID;
9651    unsigned FileID;
9652    unsigned Line;
9653    getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9654                             FileID, Line);
9655    llvm::raw_svector_ostream OS(Buffer);
9656    OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9657       << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9658    VarName = OS.str();
9659  }
9660  Linkage = llvm::GlobalValue::InternalLinkage;
9661  Addr =
9662      getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9663                                  getDefaultFirstprivateAddressSpace());
9664  cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9665  CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9666  CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9667  OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9668      VarName, Addr, VarSize,
9669      OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9670  return Addr;
9671}
9672
9673void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9674                                                   llvm::Constant *Addr) {
9675  if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9676      !CGM.getLangOpts().OpenMPIsDevice)
9677    return;
9678  llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9679      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9680  if (!Res) {
9681    if (CGM.getLangOpts().OpenMPIsDevice) {
9682      // Register non-target variables being emitted in device code (debug info
9683      // may cause this).
9684      StringRef VarName = CGM.getMangledName(VD);
9685      EmittedNonTargetVariables.try_emplace(VarName, Addr);
9686    }
9687    return;
9688  }
9689  // Register declare target variables.
9690  OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9691  StringRef VarName;
9692  CharUnits VarSize;
9693  llvm::GlobalValue::LinkageTypes Linkage;
9694
9695  if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9696      !HasRequiresUnifiedSharedMemory) {
9697    Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9698    VarName = CGM.getMangledName(VD);
9699    if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9700      VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9701      assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9702    } else {
9703      VarSize = CharUnits::Zero();
9704    }
9705    Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9706    // Temp solution to prevent optimizations of the internal variables.
9707    if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9708      std::string RefName = getName({VarName, "ref"});
9709      if (!CGM.GetGlobalValue(RefName)) {
9710        llvm::Constant *AddrRef =
9711            getOrCreateInternalVariable(Addr->getType(), RefName);
9712        auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9713        GVAddrRef->setConstant(/*Val=*/true);
9714        GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9715        GVAddrRef->setInitializer(Addr);
9716        CGM.addCompilerUsedGlobal(GVAddrRef);
9717      }
9718    }
9719  } else {
9720    assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9721            (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9722             HasRequiresUnifiedSharedMemory)) &&
9723           "Declare target attribute must link or to with unified memory.");
9724    if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9725      Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9726    else
9727      Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9728
9729    if (CGM.getLangOpts().OpenMPIsDevice) {
9730      VarName = Addr->getName();
9731      Addr = nullptr;
9732    } else {
9733      VarName = getAddrOfDeclareTargetVar(VD).getName();
9734      Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9735    }
9736    VarSize = CGM.getPointerSize();
9737    Linkage = llvm::GlobalValue::WeakAnyLinkage;
9738  }
9739
9740  OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9741      VarName, Addr, VarSize, Flags, Linkage);
9742}
9743
9744bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9745  if (isa<FunctionDecl>(GD.getDecl()) ||
9746      isa<OMPDeclareReductionDecl>(GD.getDecl()))
9747    return emitTargetFunctions(GD);
9748
9749  return emitTargetGlobalVariable(GD);
9750}
9751
9752void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9753  for (const VarDecl *VD : DeferredGlobalVariables) {
9754    llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9755        OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9756    if (!Res)
9757      continue;
9758    if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9759        !HasRequiresUnifiedSharedMemory) {
9760      CGM.EmitGlobal(VD);
9761    } else {
9762      assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9763              (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9764               HasRequiresUnifiedSharedMemory)) &&
9765             "Expected link clause or to clause with unified memory.");
9766      (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9767    }
9768  }
9769}
9770
9771void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9772    CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9773  assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9774         " Expected target-based directive.");
9775}
9776
9777void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9778    const OMPRequiresDecl *D) {
9779  for (const OMPClause *Clause : D->clauselists()) {
9780    if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9781      HasRequiresUnifiedSharedMemory = true;
9782      break;
9783    }
9784  }
9785}
9786
9787bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9788                                                       LangAS &AS) {
9789  if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9790    return false;
9791  const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9792  switch(A->getAllocatorType()) {
9793  case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9794  // Not supported, fallback to the default mem space.
9795  case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9796  case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9797  case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9798  case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9799  case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9800  case OMPAllocateDeclAttr::OMPConstMemAlloc:
9801  case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9802    AS = LangAS::Default;
9803    return true;
9804  case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9805    llvm_unreachable("Expected predefined allocator for the variables with the "
9806                     "static storage.");
9807  }
9808  return false;
9809}
9810
9811bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9812  return HasRequiresUnifiedSharedMemory;
9813}
9814
9815CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9816    CodeGenModule &CGM)
9817    : CGM(CGM) {
9818  if (CGM.getLangOpts().OpenMPIsDevice) {
9819    SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9820    CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9821  }
9822}
9823
9824CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9825  if (CGM.getLangOpts().OpenMPIsDevice)
9826    CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9827}
9828
9829bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9830  if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9831    return true;
9832
9833  const auto *D = cast<FunctionDecl>(GD.getDecl());
9834  // Do not to emit function if it is marked as declare target as it was already
9835  // emitted.
9836  if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9837    if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
9838      if (auto *F = dyn_cast_or_null<llvm::Function>(
9839              CGM.GetGlobalValue(CGM.getMangledName(GD))))
9840        return !F->isDeclaration();
9841      return false;
9842    }
9843    return true;
9844  }
9845
9846  return !AlreadyEmittedTargetDecls.insert(D).second;
9847}
9848
9849llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9850  // If we don't have entries or if we are emitting code for the device, we
9851  // don't need to do anything.
9852  if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9853      CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9854      (OffloadEntriesInfoManager.empty() &&
9855       !HasEmittedDeclareTargetRegion &&
9856       !HasEmittedTargetRegion))
9857    return nullptr;
9858
9859  // Create and register the function that handles the requires directives.
9860  ASTContext &C = CGM.getContext();
9861
9862  llvm::Function *RequiresRegFn;
9863  {
9864    CodeGenFunction CGF(CGM);
9865    const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9866    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9867    std::string ReqName = getName({"omp_offloading", "requires_reg"});
9868    RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9869    CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9870    OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9871    // TODO: check for other requires clauses.
9872    // The requires directive takes effect only when a target region is
9873    // present in the compilation unit. Otherwise it is ignored and not
9874    // passed to the runtime. This avoids the runtime from throwing an error
9875    // for mismatching requires clauses across compilation units that don't
9876    // contain at least 1 target region.
9877    assert((HasEmittedTargetRegion ||
9878            HasEmittedDeclareTargetRegion ||
9879            !OffloadEntriesInfoManager.empty()) &&
9880           "Target or declare target region expected.");
9881    if (HasRequiresUnifiedSharedMemory)
9882      Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9883    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9884        llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9885    CGF.FinishFunction();
9886  }
9887  return RequiresRegFn;
9888}
9889
9890void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9891                                    const OMPExecutableDirective &D,
9892                                    SourceLocation Loc,
9893                                    llvm::Function *OutlinedFn,
9894                                    ArrayRef<llvm::Value *> CapturedVars) {
9895  if (!CGF.HaveInsertPoint())
9896    return;
9897
9898  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9899  CodeGenFunction::RunCleanupsScope Scope(CGF);
9900
9901  // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9902  llvm::Value *Args[] = {
9903      RTLoc,
9904      CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9905      CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9906  llvm::SmallVector<llvm::Value *, 16> RealArgs;
9907  RealArgs.append(std::begin(Args), std::end(Args));
9908  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9909
9910  llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9911  CGF.EmitRuntimeCall(RTLFn, RealArgs);
9912}
9913
9914void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9915                                         const Expr *NumTeams,
9916                                         const Expr *ThreadLimit,
9917                                         SourceLocation Loc) {
9918  if (!CGF.HaveInsertPoint())
9919    return;
9920
9921  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9922
9923  llvm::Value *NumTeamsVal =
9924      NumTeams
9925          ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9926                                      CGF.CGM.Int32Ty, /* isSigned = */ true)
9927          : CGF.Builder.getInt32(0);
9928
9929  llvm::Value *ThreadLimitVal =
9930      ThreadLimit
9931          ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9932                                      CGF.CGM.Int32Ty, /* isSigned = */ true)
9933          : CGF.Builder.getInt32(0);
9934
9935  // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9936  llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9937                                     ThreadLimitVal};
9938  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9939                      PushNumTeamsArgs);
9940}
9941
9942void CGOpenMPRuntime::emitTargetDataCalls(
9943    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9944    const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9945  if (!CGF.HaveInsertPoint())
9946    return;
9947
9948  // Action used to replace the default codegen action and turn privatization
9949  // off.
9950  PrePostActionTy NoPrivAction;
9951
9952  // Generate the code for the opening of the data environment. Capture all the
9953  // arguments of the runtime call by reference because they are used in the
9954  // closing of the region.
9955  auto &&BeginThenGen = [this, &D, Device, &Info,
9956                         &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9957    // Fill up the arrays with all the mapped variables.
9958    MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9959    MappableExprsHandler::MapValuesArrayTy Pointers;
9960    MappableExprsHandler::MapValuesArrayTy Sizes;
9961    MappableExprsHandler::MapFlagsArrayTy MapTypes;
9962
9963    // Get map clause information.
9964    MappableExprsHandler MCHandler(D, CGF);
9965    MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9966
9967    // Fill up the arrays and create the arguments.
9968    emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9969
9970    llvm::Value *BasePointersArrayArg = nullptr;
9971    llvm::Value *PointersArrayArg = nullptr;
9972    llvm::Value *SizesArrayArg = nullptr;
9973    llvm::Value *MapTypesArrayArg = nullptr;
9974    emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9975                                 SizesArrayArg, MapTypesArrayArg, Info);
9976
9977    // Emit device ID if any.
9978    llvm::Value *DeviceID = nullptr;
9979    if (Device) {
9980      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9981                                           CGF.Int64Ty, /*isSigned=*/true);
9982    } else {
9983      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9984    }
9985
9986    // Emit the number of elements in the offloading arrays.
9987    llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9988
9989    llvm::Value *OffloadingArgs[] = {
9990        DeviceID,         PointerNum,    BasePointersArrayArg,
9991        PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9992    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
9993                        OffloadingArgs);
9994
9995    // If device pointer privatization is required, emit the body of the region
9996    // here. It will have to be duplicated: with and without privatization.
9997    if (!Info.CaptureDeviceAddrMap.empty())
9998      CodeGen(CGF);
9999  };
10000
10001  // Generate code for the closing of the data region.
10002  auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10003                                            PrePostActionTy &) {
10004    assert(Info.isValid() && "Invalid data environment closing arguments.");
10005
10006    llvm::Value *BasePointersArrayArg = nullptr;
10007    llvm::Value *PointersArrayArg = nullptr;
10008    llvm::Value *SizesArrayArg = nullptr;
10009    llvm::Value *MapTypesArrayArg = nullptr;
10010    emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10011                                 SizesArrayArg, MapTypesArrayArg, Info);
10012
10013    // Emit device ID if any.
10014    llvm::Value *DeviceID = nullptr;
10015    if (Device) {
10016      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10017                                           CGF.Int64Ty, /*isSigned=*/true);
10018    } else {
10019      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10020    }
10021
10022    // Emit the number of elements in the offloading arrays.
10023    llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10024
10025    llvm::Value *OffloadingArgs[] = {
10026        DeviceID,         PointerNum,    BasePointersArrayArg,
10027        PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10028    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10029                        OffloadingArgs);
10030  };
10031
10032  // If we need device pointer privatization, we need to emit the body of the
10033  // region with no privatization in the 'else' branch of the conditional.
10034  // Otherwise, we don't have to do anything.
10035  auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10036                                                         PrePostActionTy &) {
10037    if (!Info.CaptureDeviceAddrMap.empty()) {
10038      CodeGen.setAction(NoPrivAction);
10039      CodeGen(CGF);
10040    }
10041  };
10042
10043  // We don't have to do anything to close the region if the if clause evaluates
10044  // to false.
10045  auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10046
10047  if (IfCond) {
10048    emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10049  } else {
10050    RegionCodeGenTy RCG(BeginThenGen);
10051    RCG(CGF);
10052  }
10053
10054  // If we don't require privatization of device pointers, we emit the body in
10055  // between the runtime calls. This avoids duplicating the body code.
10056  if (Info.CaptureDeviceAddrMap.empty()) {
10057    CodeGen.setAction(NoPrivAction);
10058    CodeGen(CGF);
10059  }
10060
10061  if (IfCond) {
10062    emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10063  } else {
10064    RegionCodeGenTy RCG(EndThenGen);
10065    RCG(CGF);
10066  }
10067}
10068
10069void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10070    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10071    const Expr *Device) {
10072  if (!CGF.HaveInsertPoint())
10073    return;
10074
10075  assert((isa<OMPTargetEnterDataDirective>(D) ||
10076          isa<OMPTargetExitDataDirective>(D) ||
10077          isa<OMPTargetUpdateDirective>(D)) &&
10078         "Expecting either target enter, exit data, or update directives.");
10079
10080  CodeGenFunction::OMPTargetDataInfo InputInfo;
10081  llvm::Value *MapTypesArray = nullptr;
10082  // Generate the code for the opening of the data environment.
10083  auto &&ThenGen = [this, &D, Device, &InputInfo,
10084                    &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10085    // Emit device ID if any.
10086    llvm::Value *DeviceID = nullptr;
10087    if (Device) {
10088      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10089                                           CGF.Int64Ty, /*isSigned=*/true);
10090    } else {
10091      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10092    }
10093
10094    // Emit the number of elements in the offloading arrays.
10095    llvm::Constant *PointerNum =
10096        CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10097
10098    llvm::Value *OffloadingArgs[] = {DeviceID,
10099                                     PointerNum,
10100                                     InputInfo.BasePointersArray.getPointer(),
10101                                     InputInfo.PointersArray.getPointer(),
10102                                     InputInfo.SizesArray.getPointer(),
10103                                     MapTypesArray};
10104
10105    // Select the right runtime function call for each expected standalone
10106    // directive.
10107    const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10108    OpenMPRTLFunction RTLFn;
10109    switch (D.getDirectiveKind()) {
10110    case OMPD_target_enter_data:
10111      RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10112                        : OMPRTL__tgt_target_data_begin;
10113      break;
10114    case OMPD_target_exit_data:
10115      RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10116                        : OMPRTL__tgt_target_data_end;
10117      break;
10118    case OMPD_target_update:
10119      RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10120                        : OMPRTL__tgt_target_data_update;
10121      break;
10122    case OMPD_parallel:
10123    case OMPD_for:
10124    case OMPD_parallel_for:
10125    case OMPD_parallel_master:
10126    case OMPD_parallel_sections:
10127    case OMPD_for_simd:
10128    case OMPD_parallel_for_simd:
10129    case OMPD_cancel:
10130    case OMPD_cancellation_point:
10131    case OMPD_ordered:
10132    case OMPD_threadprivate:
10133    case OMPD_allocate:
10134    case OMPD_task:
10135    case OMPD_simd:
10136    case OMPD_sections:
10137    case OMPD_section:
10138    case OMPD_single:
10139    case OMPD_master:
10140    case OMPD_critical:
10141    case OMPD_taskyield:
10142    case OMPD_barrier:
10143    case OMPD_taskwait:
10144    case OMPD_taskgroup:
10145    case OMPD_atomic:
10146    case OMPD_flush:
10147    case OMPD_teams:
10148    case OMPD_target_data:
10149    case OMPD_distribute:
10150    case OMPD_distribute_simd:
10151    case OMPD_distribute_parallel_for:
10152    case OMPD_distribute_parallel_for_simd:
10153    case OMPD_teams_distribute:
10154    case OMPD_teams_distribute_simd:
10155    case OMPD_teams_distribute_parallel_for:
10156    case OMPD_teams_distribute_parallel_for_simd:
10157    case OMPD_declare_simd:
10158    case OMPD_declare_variant:
10159    case OMPD_declare_target:
10160    case OMPD_end_declare_target:
10161    case OMPD_declare_reduction:
10162    case OMPD_declare_mapper:
10163    case OMPD_taskloop:
10164    case OMPD_taskloop_simd:
10165    case OMPD_master_taskloop:
10166    case OMPD_master_taskloop_simd:
10167    case OMPD_parallel_master_taskloop:
10168    case OMPD_parallel_master_taskloop_simd:
10169    case OMPD_target:
10170    case OMPD_target_simd:
10171    case OMPD_target_teams_distribute:
10172    case OMPD_target_teams_distribute_simd:
10173    case OMPD_target_teams_distribute_parallel_for:
10174    case OMPD_target_teams_distribute_parallel_for_simd:
10175    case OMPD_target_teams:
10176    case OMPD_target_parallel:
10177    case OMPD_target_parallel_for:
10178    case OMPD_target_parallel_for_simd:
10179    case OMPD_requires:
10180    case OMPD_unknown:
10181      llvm_unreachable("Unexpected standalone target data directive.");
10182      break;
10183    }
10184    CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10185  };
10186
10187  auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10188                             CodeGenFunction &CGF, PrePostActionTy &) {
10189    // Fill up the arrays with all the mapped variables.
10190    MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10191    MappableExprsHandler::MapValuesArrayTy Pointers;
10192    MappableExprsHandler::MapValuesArrayTy Sizes;
10193    MappableExprsHandler::MapFlagsArrayTy MapTypes;
10194
10195    // Get map clause information.
10196    MappableExprsHandler MEHandler(D, CGF);
10197    MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10198
10199    TargetDataInfo Info;
10200    // Fill up the arrays and create the arguments.
10201    emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10202    emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10203                                 Info.PointersArray, Info.SizesArray,
10204                                 Info.MapTypesArray, Info);
10205    InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10206    InputInfo.BasePointersArray =
10207        Address(Info.BasePointersArray, CGM.getPointerAlign());
10208    InputInfo.PointersArray =
10209        Address(Info.PointersArray, CGM.getPointerAlign());
10210    InputInfo.SizesArray =
10211        Address(Info.SizesArray, CGM.getPointerAlign());
10212    MapTypesArray = Info.MapTypesArray;
10213    if (D.hasClausesOfKind<OMPDependClause>())
10214      CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10215    else
10216      emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10217  };
10218
10219  if (IfCond) {
10220    emitIfClause(CGF, IfCond, TargetThenGen,
10221                 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10222  } else {
10223    RegionCodeGenTy ThenRCG(TargetThenGen);
10224    ThenRCG(CGF);
10225  }
10226}
10227
10228namespace {
10229  /// Kind of parameter in a function with 'declare simd' directive.
10230  enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10231  /// Attribute set of the parameter.
10232  struct ParamAttrTy {
10233    ParamKindTy Kind = Vector;
10234    llvm::APSInt StrideOrArg;
10235    llvm::APSInt Alignment;
10236  };
10237} // namespace
10238
10239static unsigned evaluateCDTSize(const FunctionDecl *FD,
10240                                ArrayRef<ParamAttrTy> ParamAttrs) {
10241  // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10242  // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10243  // of that clause. The VLEN value must be power of 2.
10244  // In other case the notion of the function`s "characteristic data type" (CDT)
10245  // is used to compute the vector length.
10246  // CDT is defined in the following order:
10247  //   a) For non-void function, the CDT is the return type.
10248  //   b) If the function has any non-uniform, non-linear parameters, then the
10249  //   CDT is the type of the first such parameter.
10250  //   c) If the CDT determined by a) or b) above is struct, union, or class
10251  //   type which is pass-by-value (except for the type that maps to the
10252  //   built-in complex data type), the characteristic data type is int.
10253  //   d) If none of the above three cases is applicable, the CDT is int.
10254  // The VLEN is then determined based on the CDT and the size of vector
10255  // register of that ISA for which current vector version is generated. The
10256  // VLEN is computed using the formula below:
10257  //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10258  // where vector register size specified in section 3.2.1 Registers and the
10259  // Stack Frame of original AMD64 ABI document.
10260  QualType RetType = FD->getReturnType();
10261  if (RetType.isNull())
10262    return 0;
10263  ASTContext &C = FD->getASTContext();
10264  QualType CDT;
10265  if (!RetType.isNull() && !RetType->isVoidType()) {
10266    CDT = RetType;
10267  } else {
10268    unsigned Offset = 0;
10269    if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10270      if (ParamAttrs[Offset].Kind == Vector)
10271        CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10272      ++Offset;
10273    }
10274    if (CDT.isNull()) {
10275      for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10276        if (ParamAttrs[I + Offset].Kind == Vector) {
10277          CDT = FD->getParamDecl(I)->getType();
10278          break;
10279        }
10280      }
10281    }
10282  }
10283  if (CDT.isNull())
10284    CDT = C.IntTy;
10285  CDT = CDT->getCanonicalTypeUnqualified();
10286  if (CDT->isRecordType() || CDT->isUnionType())
10287    CDT = C.IntTy;
10288  return C.getTypeSize(CDT);
10289}
10290
10291static void
10292emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10293                           const llvm::APSInt &VLENVal,
10294                           ArrayRef<ParamAttrTy> ParamAttrs,
10295                           OMPDeclareSimdDeclAttr::BranchStateTy State) {
10296  struct ISADataTy {
10297    char ISA;
10298    unsigned VecRegSize;
10299  };
10300  ISADataTy ISAData[] = {
10301      {
10302          'b', 128
10303      }, // SSE
10304      {
10305          'c', 256
10306      }, // AVX
10307      {
10308          'd', 256
10309      }, // AVX2
10310      {
10311          'e', 512
10312      }, // AVX512
10313  };
10314  llvm::SmallVector<char, 2> Masked;
10315  switch (State) {
10316  case OMPDeclareSimdDeclAttr::BS_Undefined:
10317    Masked.push_back('N');
10318    Masked.push_back('M');
10319    break;
10320  case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10321    Masked.push_back('N');
10322    break;
10323  case OMPDeclareSimdDeclAttr::BS_Inbranch:
10324    Masked.push_back('M');
10325    break;
10326  }
10327  for (char Mask : Masked) {
10328    for (const ISADataTy &Data : ISAData) {
10329      SmallString<256> Buffer;
10330      llvm::raw_svector_ostream Out(Buffer);
10331      Out << "_ZGV" << Data.ISA << Mask;
10332      if (!VLENVal) {
10333        unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10334        assert(NumElts && "Non-zero simdlen/cdtsize expected");
10335        Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10336      } else {
10337        Out << VLENVal;
10338      }
10339      for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10340        switch (ParamAttr.Kind){
10341        case LinearWithVarStride:
10342          Out << 's' << ParamAttr.StrideOrArg;
10343          break;
10344        case Linear:
10345          Out << 'l';
10346          if (!!ParamAttr.StrideOrArg)
10347            Out << ParamAttr.StrideOrArg;
10348          break;
10349        case Uniform:
10350          Out << 'u';
10351          break;
10352        case Vector:
10353          Out << 'v';
10354          break;
10355        }
10356        if (!!ParamAttr.Alignment)
10357          Out << 'a' << ParamAttr.Alignment;
10358      }
10359      Out << '_' << Fn->getName();
10360      Fn->addFnAttr(Out.str());
10361    }
10362  }
10363}
10364
10365// This are the Functions that are needed to mangle the name of the
10366// vector functions generated by the compiler, according to the rules
10367// defined in the "Vector Function ABI specifications for AArch64",
10368// available at
10369// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10370
10371/// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10372///
10373/// TODO: Need to implement the behavior for reference marked with a
10374/// var or no linear modifiers (1.b in the section). For this, we
10375/// need to extend ParamKindTy to support the linear modifiers.
10376static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10377  QT = QT.getCanonicalType();
10378
10379  if (QT->isVoidType())
10380    return false;
10381
10382  if (Kind == ParamKindTy::Uniform)
10383    return false;
10384
10385  if (Kind == ParamKindTy::Linear)
10386    return false;
10387
10388  // TODO: Handle linear references with modifiers
10389
10390  if (Kind == ParamKindTy::LinearWithVarStride)
10391    return false;
10392
10393  return true;
10394}
10395
10396/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10397static bool getAArch64PBV(QualType QT, ASTContext &C) {
10398  QT = QT.getCanonicalType();
10399  unsigned Size = C.getTypeSize(QT);
10400
10401  // Only scalars and complex within 16 bytes wide set PVB to true.
10402  if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10403    return false;
10404
10405  if (QT->isFloatingType())
10406    return true;
10407
10408  if (QT->isIntegerType())
10409    return true;
10410
10411  if (QT->isPointerType())
10412    return true;
10413
10414  // TODO: Add support for complex types (section 3.1.2, item 2).
10415
10416  return false;
10417}
10418
10419/// Computes the lane size (LS) of a return type or of an input parameter,
10420/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10421/// TODO: Add support for references, section 3.2.1, item 1.
10422static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10423  if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10424    QualType PTy = QT.getCanonicalType()->getPointeeType();
10425    if (getAArch64PBV(PTy, C))
10426      return C.getTypeSize(PTy);
10427  }
10428  if (getAArch64PBV(QT, C))
10429    return C.getTypeSize(QT);
10430
10431  return C.getTypeSize(C.getUIntPtrType());
10432}
10433
10434// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10435// signature of the scalar function, as defined in 3.2.2 of the
10436// AAVFABI.
10437static std::tuple<unsigned, unsigned, bool>
10438getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10439  QualType RetType = FD->getReturnType().getCanonicalType();
10440
10441  ASTContext &C = FD->getASTContext();
10442
10443  bool OutputBecomesInput = false;
10444
10445  llvm::SmallVector<unsigned, 8> Sizes;
10446  if (!RetType->isVoidType()) {
10447    Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10448    if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10449      OutputBecomesInput = true;
10450  }
10451  for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10452    QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10453    Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10454  }
10455
10456  assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10457  // The LS of a function parameter / return value can only be a power
10458  // of 2, starting from 8 bits, up to 128.
10459  assert(std::all_of(Sizes.begin(), Sizes.end(),
10460                     [](unsigned Size) {
10461                       return Size == 8 || Size == 16 || Size == 32 ||
10462                              Size == 64 || Size == 128;
10463                     }) &&
10464         "Invalid size");
10465
10466  return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10467                         *std::max_element(std::begin(Sizes), std::end(Sizes)),
10468                         OutputBecomesInput);
10469}
10470
10471/// Mangle the parameter part of the vector function name according to
10472/// their OpenMP classification. The mangling function is defined in
10473/// section 3.5 of the AAVFABI.
10474static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10475  SmallString<256> Buffer;
10476  llvm::raw_svector_ostream Out(Buffer);
10477  for (const auto &ParamAttr : ParamAttrs) {
10478    switch (ParamAttr.Kind) {
10479    case LinearWithVarStride:
10480      Out << "ls" << ParamAttr.StrideOrArg;
10481      break;
10482    case Linear:
10483      Out << 'l';
10484      // Don't print the step value if it is not present or if it is
10485      // equal to 1.
10486      if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10487        Out << ParamAttr.StrideOrArg;
10488      break;
10489    case Uniform:
10490      Out << 'u';
10491      break;
10492    case Vector:
10493      Out << 'v';
10494      break;
10495    }
10496
10497    if (!!ParamAttr.Alignment)
10498      Out << 'a' << ParamAttr.Alignment;
10499  }
10500
10501  return Out.str();
10502}
10503
10504// Function used to add the attribute. The parameter `VLEN` is
10505// templated to allow the use of "x" when targeting scalable functions
10506// for SVE.
10507template <typename T>
10508static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10509                                 char ISA, StringRef ParSeq,
10510                                 StringRef MangledName, bool OutputBecomesInput,
10511                                 llvm::Function *Fn) {
10512  SmallString<256> Buffer;
10513  llvm::raw_svector_ostream Out(Buffer);
10514  Out << Prefix << ISA << LMask << VLEN;
10515  if (OutputBecomesInput)
10516    Out << "v";
10517  Out << ParSeq << "_" << MangledName;
10518  Fn->addFnAttr(Out.str());
10519}
10520
10521// Helper function to generate the Advanced SIMD names depending on
10522// the value of the NDS when simdlen is not present.
10523static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10524                                      StringRef Prefix, char ISA,
10525                                      StringRef ParSeq, StringRef MangledName,
10526                                      bool OutputBecomesInput,
10527                                      llvm::Function *Fn) {
10528  switch (NDS) {
10529  case 8:
10530    addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10531                         OutputBecomesInput, Fn);
10532    addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10533                         OutputBecomesInput, Fn);
10534    break;
10535  case 16:
10536    addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10537                         OutputBecomesInput, Fn);
10538    addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10539                         OutputBecomesInput, Fn);
10540    break;
10541  case 32:
10542    addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10543                         OutputBecomesInput, Fn);
10544    addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10545                         OutputBecomesInput, Fn);
10546    break;
10547  case 64:
10548  case 128:
10549    addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10550                         OutputBecomesInput, Fn);
10551    break;
10552  default:
10553    llvm_unreachable("Scalar type is too wide.");
10554  }
10555}
10556
10557/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10558static void emitAArch64DeclareSimdFunction(
10559    CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10560    ArrayRef<ParamAttrTy> ParamAttrs,
10561    OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10562    char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10563
10564  // Get basic data for building the vector signature.
10565  const auto Data = getNDSWDS(FD, ParamAttrs);
10566  const unsigned NDS = std::get<0>(Data);
10567  const unsigned WDS = std::get<1>(Data);
10568  const bool OutputBecomesInput = std::get<2>(Data);
10569
10570  // Check the values provided via `simdlen` by the user.
10571  // 1. A `simdlen(1)` doesn't produce vector signatures,
10572  if (UserVLEN == 1) {
10573    unsigned DiagID = CGM.getDiags().getCustomDiagID(
10574        DiagnosticsEngine::Warning,
10575        "The clause simdlen(1) has no effect when targeting aarch64.");
10576    CGM.getDiags().Report(SLoc, DiagID);
10577    return;
10578  }
10579
10580  // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10581  // Advanced SIMD output.
10582  if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10583    unsigned DiagID = CGM.getDiags().getCustomDiagID(
10584        DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10585                                    "power of 2 when targeting Advanced SIMD.");
10586    CGM.getDiags().Report(SLoc, DiagID);
10587    return;
10588  }
10589
10590  // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10591  // limits.
10592  if (ISA == 's' && UserVLEN != 0) {
10593    if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10594      unsigned DiagID = CGM.getDiags().getCustomDiagID(
10595          DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10596                                      "lanes in the architectural constraints "
10597                                      "for SVE (min is 128-bit, max is "
10598                                      "2048-bit, by steps of 128-bit)");
10599      CGM.getDiags().Report(SLoc, DiagID) << WDS;
10600      return;
10601    }
10602  }
10603
10604  // Sort out parameter sequence.
10605  const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10606  StringRef Prefix = "_ZGV";
10607  // Generate simdlen from user input (if any).
10608  if (UserVLEN) {
10609    if (ISA == 's') {
10610      // SVE generates only a masked function.
10611      addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10612                           OutputBecomesInput, Fn);
10613    } else {
10614      assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10615      // Advanced SIMD generates one or two functions, depending on
10616      // the `[not]inbranch` clause.
10617      switch (State) {
10618      case OMPDeclareSimdDeclAttr::BS_Undefined:
10619        addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10620                             OutputBecomesInput, Fn);
10621        addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10622                             OutputBecomesInput, Fn);
10623        break;
10624      case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10625        addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10626                             OutputBecomesInput, Fn);
10627        break;
10628      case OMPDeclareSimdDeclAttr::BS_Inbranch:
10629        addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10630                             OutputBecomesInput, Fn);
10631        break;
10632      }
10633    }
10634  } else {
10635    // If no user simdlen is provided, follow the AAVFABI rules for
10636    // generating the vector length.
10637    if (ISA == 's') {
10638      // SVE, section 3.4.1, item 1.
10639      addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10640                           OutputBecomesInput, Fn);
10641    } else {
10642      assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10643      // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10644      // two vector names depending on the use of the clause
10645      // `[not]inbranch`.
10646      switch (State) {
10647      case OMPDeclareSimdDeclAttr::BS_Undefined:
10648        addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10649                                  OutputBecomesInput, Fn);
10650        addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10651                                  OutputBecomesInput, Fn);
10652        break;
10653      case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10654        addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10655                                  OutputBecomesInput, Fn);
10656        break;
10657      case OMPDeclareSimdDeclAttr::BS_Inbranch:
10658        addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10659                                  OutputBecomesInput, Fn);
10660        break;
10661      }
10662    }
10663  }
10664}
10665
10666void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10667                                              llvm::Function *Fn) {
10668  ASTContext &C = CGM.getContext();
10669  FD = FD->getMostRecentDecl();
10670  // Map params to their positions in function decl.
10671  llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10672  if (isa<CXXMethodDecl>(FD))
10673    ParamPositions.try_emplace(FD, 0);
10674  unsigned ParamPos = ParamPositions.size();
10675  for (const ParmVarDecl *P : FD->parameters()) {
10676    ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10677    ++ParamPos;
10678  }
10679  while (FD) {
10680    for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10681      llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10682      // Mark uniform parameters.
10683      for (const Expr *E : Attr->uniforms()) {
10684        E = E->IgnoreParenImpCasts();
10685        unsigned Pos;
10686        if (isa<CXXThisExpr>(E)) {
10687          Pos = ParamPositions[FD];
10688        } else {
10689          const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10690                                ->getCanonicalDecl();
10691          Pos = ParamPositions[PVD];
10692        }
10693        ParamAttrs[Pos].Kind = Uniform;
10694      }
10695      // Get alignment info.
10696      auto NI = Attr->alignments_begin();
10697      for (const Expr *E : Attr->aligneds()) {
10698        E = E->IgnoreParenImpCasts();
10699        unsigned Pos;
10700        QualType ParmTy;
10701        if (isa<CXXThisExpr>(E)) {
10702          Pos = ParamPositions[FD];
10703          ParmTy = E->getType();
10704        } else {
10705          const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10706                                ->getCanonicalDecl();
10707          Pos = ParamPositions[PVD];
10708          ParmTy = PVD->getType();
10709        }
10710        ParamAttrs[Pos].Alignment =
10711            (*NI)
10712                ? (*NI)->EvaluateKnownConstInt(C)
10713                : llvm::APSInt::getUnsigned(
10714                      C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10715                          .getQuantity());
10716        ++NI;
10717      }
10718      // Mark linear parameters.
10719      auto SI = Attr->steps_begin();
10720      auto MI = Attr->modifiers_begin();
10721      for (const Expr *E : Attr->linears()) {
10722        E = E->IgnoreParenImpCasts();
10723        unsigned Pos;
10724        if (isa<CXXThisExpr>(E)) {
10725          Pos = ParamPositions[FD];
10726        } else {
10727          const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10728                                ->getCanonicalDecl();
10729          Pos = ParamPositions[PVD];
10730        }
10731        ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10732        ParamAttr.Kind = Linear;
10733        if (*SI) {
10734          Expr::EvalResult Result;
10735          if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10736            if (const auto *DRE =
10737                    cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10738              if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10739                ParamAttr.Kind = LinearWithVarStride;
10740                ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10741                    ParamPositions[StridePVD->getCanonicalDecl()]);
10742              }
10743            }
10744          } else {
10745            ParamAttr.StrideOrArg = Result.Val.getInt();
10746          }
10747        }
10748        ++SI;
10749        ++MI;
10750      }
10751      llvm::APSInt VLENVal;
10752      SourceLocation ExprLoc;
10753      const Expr *VLENExpr = Attr->getSimdlen();
10754      if (VLENExpr) {
10755        VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10756        ExprLoc = VLENExpr->getExprLoc();
10757      }
10758      OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10759      if (CGM.getTriple().isX86()) {
10760        emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10761      } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10762        unsigned VLEN = VLENVal.getExtValue();
10763        StringRef MangledName = Fn->getName();
10764        if (CGM.getTarget().hasFeature("sve"))
10765          emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10766                                         MangledName, 's', 128, Fn, ExprLoc);
10767        if (CGM.getTarget().hasFeature("neon"))
10768          emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10769                                         MangledName, 'n', 128, Fn, ExprLoc);
10770      }
10771    }
10772    FD = FD->getPreviousDecl();
10773  }
10774}
10775
10776namespace {
10777/// Cleanup action for doacross support.
10778class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10779public:
10780  static const int DoacrossFinArgs = 2;
10781
10782private:
10783  llvm::FunctionCallee RTLFn;
10784  llvm::Value *Args[DoacrossFinArgs];
10785
10786public:
10787  DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10788                    ArrayRef<llvm::Value *> CallArgs)
10789      : RTLFn(RTLFn) {
10790    assert(CallArgs.size() == DoacrossFinArgs);
10791    std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10792  }
10793  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10794    if (!CGF.HaveInsertPoint())
10795      return;
10796    CGF.EmitRuntimeCall(RTLFn, Args);
10797  }
10798};
10799} // namespace
10800
10801void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10802                                       const OMPLoopDirective &D,
10803                                       ArrayRef<Expr *> NumIterations) {
10804  if (!CGF.HaveInsertPoint())
10805    return;
10806
10807  ASTContext &C = CGM.getContext();
10808  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10809  RecordDecl *RD;
10810  if (KmpDimTy.isNull()) {
10811    // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10812    //  kmp_int64 lo; // lower
10813    //  kmp_int64 up; // upper
10814    //  kmp_int64 st; // stride
10815    // };
10816    RD = C.buildImplicitRecord("kmp_dim");
10817    RD->startDefinition();
10818    addFieldToRecordDecl(C, RD, Int64Ty);
10819    addFieldToRecordDecl(C, RD, Int64Ty);
10820    addFieldToRecordDecl(C, RD, Int64Ty);
10821    RD->completeDefinition();
10822    KmpDimTy = C.getRecordType(RD);
10823  } else {
10824    RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10825  }
10826  llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10827  QualType ArrayTy =
10828      C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
10829
10830  Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10831  CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10832  enum { LowerFD = 0, UpperFD, StrideFD };
10833  // Fill dims with data.
10834  for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10835    LValue DimsLVal = CGF.MakeAddrLValue(
10836        CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10837    // dims.upper = num_iterations;
10838    LValue UpperLVal = CGF.EmitLValueForField(
10839        DimsLVal, *std::next(RD->field_begin(), UpperFD));
10840    llvm::Value *NumIterVal =
10841        CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10842                                 D.getNumIterations()->getType(), Int64Ty,
10843                                 D.getNumIterations()->getExprLoc());
10844    CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10845    // dims.stride = 1;
10846    LValue StrideLVal = CGF.EmitLValueForField(
10847        DimsLVal, *std::next(RD->field_begin(), StrideFD));
10848    CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10849                          StrideLVal);
10850  }
10851
10852  // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10853  // kmp_int32 num_dims, struct kmp_dim * dims);
10854  llvm::Value *Args[] = {
10855      emitUpdateLocation(CGF, D.getBeginLoc()),
10856      getThreadID(CGF, D.getBeginLoc()),
10857      llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10858      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10859          CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10860          CGM.VoidPtrTy)};
10861
10862  llvm::FunctionCallee RTLFn =
10863      createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10864  CGF.EmitRuntimeCall(RTLFn, Args);
10865  llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10866      emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10867  llvm::FunctionCallee FiniRTLFn =
10868      createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10869  CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10870                                             llvm::makeArrayRef(FiniArgs));
10871}
10872
10873void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10874                                          const OMPDependClause *C) {
10875  QualType Int64Ty =
10876      CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10877  llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10878  QualType ArrayTy = CGM.getContext().getConstantArrayType(
10879      Int64Ty, Size, nullptr, ArrayType::Normal, 0);
10880  Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10881  for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10882    const Expr *CounterVal = C->getLoopData(I);
10883    assert(CounterVal);
10884    llvm::Value *CntVal = CGF.EmitScalarConversion(
10885        CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10886        CounterVal->getExprLoc());
10887    CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10888                          /*Volatile=*/false, Int64Ty);
10889  }
10890  llvm::Value *Args[] = {
10891      emitUpdateLocation(CGF, C->getBeginLoc()),
10892      getThreadID(CGF, C->getBeginLoc()),
10893      CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10894  llvm::FunctionCallee RTLFn;
10895  if (C->getDependencyKind() == OMPC_DEPEND_source) {
10896    RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10897  } else {
10898    assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10899    RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10900  }
10901  CGF.EmitRuntimeCall(RTLFn, Args);
10902}
10903
10904void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10905                               llvm::FunctionCallee Callee,
10906                               ArrayRef<llvm::Value *> Args) const {
10907  assert(Loc.isValid() && "Outlined function call location must be valid.");
10908  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10909
10910  if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10911    if (Fn->doesNotThrow()) {
10912      CGF.EmitNounwindRuntimeCall(Fn, Args);
10913      return;
10914    }
10915  }
10916  CGF.EmitRuntimeCall(Callee, Args);
10917}
10918
10919void CGOpenMPRuntime::emitOutlinedFunctionCall(
10920    CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10921    ArrayRef<llvm::Value *> Args) const {
10922  emitCall(CGF, Loc, OutlinedFn, Args);
10923}
10924
10925void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10926  if (const auto *FD = dyn_cast<FunctionDecl>(D))
10927    if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10928      HasEmittedDeclareTargetRegion = true;
10929}
10930
10931Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10932                                             const VarDecl *NativeParam,
10933                                             const VarDecl *TargetParam) const {
10934  return CGF.GetAddrOfLocalVar(NativeParam);
10935}
10936
10937namespace {
10938/// Cleanup action for allocate support.
10939class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10940public:
10941  static const int CleanupArgs = 3;
10942
10943private:
10944  llvm::FunctionCallee RTLFn;
10945  llvm::Value *Args[CleanupArgs];
10946
10947public:
10948  OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10949                       ArrayRef<llvm::Value *> CallArgs)
10950      : RTLFn(RTLFn) {
10951    assert(CallArgs.size() == CleanupArgs &&
10952           "Size of arguments does not match.");
10953    std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10954  }
10955  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10956    if (!CGF.HaveInsertPoint())
10957      return;
10958    CGF.EmitRuntimeCall(RTLFn, Args);
10959  }
10960};
10961} // namespace
10962
10963Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10964                                                   const VarDecl *VD) {
10965  if (!VD)
10966    return Address::invalid();
10967  const VarDecl *CVD = VD->getCanonicalDecl();
10968  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10969    return Address::invalid();
10970  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10971  // Use the default allocation.
10972  if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10973      !AA->getAllocator())
10974    return Address::invalid();
10975  llvm::Value *Size;
10976  CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10977  if (CVD->getType()->isVariablyModifiedType()) {
10978    Size = CGF.getTypeSize(CVD->getType());
10979    // Align the size: ((size + align - 1) / align) * align
10980    Size = CGF.Builder.CreateNUWAdd(
10981        Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10982    Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10983    Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10984  } else {
10985    CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10986    Size = CGM.getSize(Sz.alignTo(Align));
10987  }
10988  llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10989  assert(AA->getAllocator() &&
10990         "Expected allocator expression for non-default allocator.");
10991  llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10992  // According to the standard, the original allocator type is a enum (integer).
10993  // Convert to pointer type, if required.
10994  if (Allocator->getType()->isIntegerTy())
10995    Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
10996  else if (Allocator->getType()->isPointerTy())
10997    Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
10998                                                                CGM.VoidPtrTy);
10999  llvm::Value *Args[] = {ThreadID, Size, Allocator};
11000
11001  llvm::Value *Addr =
11002      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11003                          CVD->getName() + ".void.addr");
11004  llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11005                                                              Allocator};
11006  llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11007
11008  CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11009                                                llvm::makeArrayRef(FiniArgs));
11010  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11011      Addr,
11012      CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11013      CVD->getName() + ".addr");
11014  return Address(Addr, Align);
11015}
11016
11017namespace {
11018using OMPContextSelectorData =
11019    OpenMPCtxSelectorData<ArrayRef<StringRef>, llvm::APSInt>;
11020using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>;
11021} // anonymous namespace
11022
11023/// Checks current context and returns true if it matches the context selector.
11024template <OpenMPContextSelectorSetKind CtxSet, OpenMPContextSelectorKind Ctx,
11025          typename... Arguments>
11026static bool checkContext(const OMPContextSelectorData &Data,
11027                         Arguments... Params) {
11028  assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown &&
11029         "Unknown context selector or context selector set.");
11030  return false;
11031}
11032
11033/// Checks for implementation={vendor(<vendor>)} context selector.
11034/// \returns true iff <vendor>="llvm", false otherwise.
11035template <>
11036bool checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(
11037    const OMPContextSelectorData &Data) {
11038  return llvm::all_of(Data.Names,
11039                      [](StringRef S) { return !S.compare_lower("llvm"); });
11040}
11041
11042/// Checks for device={kind(<kind>)} context selector.
11043/// \returns true if <kind>="host" and compilation is for host.
11044/// true if <kind>="nohost" and compilation is for device.
11045/// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU.
11046/// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN.
11047/// false otherwise.
11048template <>
11049bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(
11050    const OMPContextSelectorData &Data, CodeGenModule &CGM) {
11051  for (StringRef Name : Data.Names) {
11052    if (!Name.compare_lower("host")) {
11053      if (CGM.getLangOpts().OpenMPIsDevice)
11054        return false;
11055      continue;
11056    }
11057    if (!Name.compare_lower("nohost")) {
11058      if (!CGM.getLangOpts().OpenMPIsDevice)
11059        return false;
11060      continue;
11061    }
11062    switch (CGM.getTriple().getArch()) {
11063    case llvm::Triple::arm:
11064    case llvm::Triple::armeb:
11065    case llvm::Triple::aarch64:
11066    case llvm::Triple::aarch64_be:
11067    case llvm::Triple::aarch64_32:
11068    case llvm::Triple::ppc:
11069    case llvm::Triple::ppc64:
11070    case llvm::Triple::ppc64le:
11071    case llvm::Triple::x86:
11072    case llvm::Triple::x86_64:
11073      if (Name.compare_lower("cpu"))
11074        return false;
11075      break;
11076    case llvm::Triple::amdgcn:
11077    case llvm::Triple::nvptx:
11078    case llvm::Triple::nvptx64:
11079      if (Name.compare_lower("gpu"))
11080        return false;
11081      break;
11082    case llvm::Triple::UnknownArch:
11083    case llvm::Triple::arc:
11084    case llvm::Triple::avr:
11085    case llvm::Triple::bpfel:
11086    case llvm::Triple::bpfeb:
11087    case llvm::Triple::hexagon:
11088    case llvm::Triple::mips:
11089    case llvm::Triple::mipsel:
11090    case llvm::Triple::mips64:
11091    case llvm::Triple::mips64el:
11092    case llvm::Triple::msp430:
11093    case llvm::Triple::r600:
11094    case llvm::Triple::riscv32:
11095    case llvm::Triple::riscv64:
11096    case llvm::Triple::sparc:
11097    case llvm::Triple::sparcv9:
11098    case llvm::Triple::sparcel:
11099    case llvm::Triple::systemz:
11100    case llvm::Triple::tce:
11101    case llvm::Triple::tcele:
11102    case llvm::Triple::thumb:
11103    case llvm::Triple::thumbeb:
11104    case llvm::Triple::xcore:
11105    case llvm::Triple::le32:
11106    case llvm::Triple::le64:
11107    case llvm::Triple::amdil:
11108    case llvm::Triple::amdil64:
11109    case llvm::Triple::hsail:
11110    case llvm::Triple::hsail64:
11111    case llvm::Triple::spir:
11112    case llvm::Triple::spir64:
11113    case llvm::Triple::kalimba:
11114    case llvm::Triple::shave:
11115    case llvm::Triple::lanai:
11116    case llvm::Triple::wasm32:
11117    case llvm::Triple::wasm64:
11118    case llvm::Triple::renderscript32:
11119    case llvm::Triple::renderscript64:
11120    case llvm::Triple::ve:
11121      return false;
11122    }
11123  }
11124  return true;
11125}
11126
11127static bool matchesContext(CodeGenModule &CGM,
11128                           const CompleteOMPContextSelectorData &ContextData) {
11129  for (const OMPContextSelectorData &Data : ContextData) {
11130    switch (Data.Ctx) {
11131    case OMP_CTX_vendor:
11132      assert(Data.CtxSet == OMP_CTX_SET_implementation &&
11133             "Expected implementation context selector set.");
11134      if (!checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(Data))
11135        return false;
11136      break;
11137    case OMP_CTX_kind:
11138      assert(Data.CtxSet == OMP_CTX_SET_device &&
11139             "Expected device context selector set.");
11140      if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data,
11141                                                                           CGM))
11142        return false;
11143      break;
11144    case OMP_CTX_unknown:
11145      llvm_unreachable("Unknown context selector kind.");
11146    }
11147  }
11148  return true;
11149}
11150
11151static CompleteOMPContextSelectorData
11152translateAttrToContextSelectorData(ASTContext &C,
11153                                   const OMPDeclareVariantAttr *A) {
11154  CompleteOMPContextSelectorData Data;
11155  for (unsigned I = 0, E = A->scores_size(); I < E; ++I) {
11156    Data.emplace_back();
11157    auto CtxSet = static_cast<OpenMPContextSelectorSetKind>(
11158        *std::next(A->ctxSelectorSets_begin(), I));
11159    auto Ctx = static_cast<OpenMPContextSelectorKind>(
11160        *std::next(A->ctxSelectors_begin(), I));
11161    Data.back().CtxSet = CtxSet;
11162    Data.back().Ctx = Ctx;
11163    const Expr *Score = *std::next(A->scores_begin(), I);
11164    Data.back().Score = Score->EvaluateKnownConstInt(C);
11165    switch (Ctx) {
11166    case OMP_CTX_vendor:
11167      assert(CtxSet == OMP_CTX_SET_implementation &&
11168             "Expected implementation context selector set.");
11169      Data.back().Names =
11170          llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end());
11171      break;
11172    case OMP_CTX_kind:
11173      assert(CtxSet == OMP_CTX_SET_device &&
11174             "Expected device context selector set.");
11175      Data.back().Names =
11176          llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end());
11177      break;
11178    case OMP_CTX_unknown:
11179      llvm_unreachable("Unknown context selector kind.");
11180    }
11181  }
11182  return Data;
11183}
11184
11185static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS,
11186                           const CompleteOMPContextSelectorData &RHS) {
11187  llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData;
11188  for (const OMPContextSelectorData &D : RHS) {
11189    auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx));
11190    Pair.getSecond().insert(D.Names.begin(), D.Names.end());
11191  }
11192  bool AllSetsAreEqual = true;
11193  for (const OMPContextSelectorData &D : LHS) {
11194    auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx));
11195    if (It == RHSData.end())
11196      return false;
11197    if (D.Names.size() > It->getSecond().size())
11198      return false;
11199    if (llvm::set_union(It->getSecond(), D.Names))
11200      return false;
11201    AllSetsAreEqual =
11202        AllSetsAreEqual && (D.Names.size() == It->getSecond().size());
11203  }
11204
11205  return LHS.size() != RHS.size() || !AllSetsAreEqual;
11206}
11207
11208static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS,
11209                            const CompleteOMPContextSelectorData &RHS) {
11210  // Score is calculated as sum of all scores + 1.
11211  llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
11212  bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS);
11213  if (RHSIsSubsetOfLHS) {
11214    LHSScore = llvm::APSInt::get(0);
11215  } else {
11216    for (const OMPContextSelectorData &Data : LHS) {
11217      if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) {
11218        LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
11219      } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) {
11220        LHSScore += Data.Score.extend(LHSScore.getBitWidth());
11221      } else {
11222        LHSScore += Data.Score;
11223      }
11224    }
11225  }
11226  llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
11227  if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) {
11228    RHSScore = llvm::APSInt::get(0);
11229  } else {
11230    for (const OMPContextSelectorData &Data : RHS) {
11231      if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) {
11232        RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
11233      } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) {
11234        RHSScore += Data.Score.extend(RHSScore.getBitWidth());
11235      } else {
11236        RHSScore += Data.Score;
11237      }
11238    }
11239  }
11240  return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0;
11241}
11242
11243/// Finds the variant function that matches current context with its context
11244/// selector.
11245static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM,
11246                                                     const FunctionDecl *FD) {
11247  if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
11248    return FD;
11249  // Iterate through all DeclareVariant attributes and check context selectors.
11250  const OMPDeclareVariantAttr *TopMostAttr = nullptr;
11251  CompleteOMPContextSelectorData TopMostData;
11252  for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
11253    CompleteOMPContextSelectorData Data =
11254        translateAttrToContextSelectorData(CGM.getContext(), A);
11255    if (!matchesContext(CGM, Data))
11256      continue;
11257    // If the attribute matches the context, find the attribute with the highest
11258    // score.
11259    if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) {
11260      TopMostAttr = A;
11261      TopMostData.swap(Data);
11262    }
11263  }
11264  if (!TopMostAttr)
11265    return FD;
11266  return cast<FunctionDecl>(
11267      cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts())
11268          ->getDecl());
11269}
11270
11271bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
11272  const auto *D = cast<FunctionDecl>(GD.getDecl());
11273  // If the original function is defined already, use its definition.
11274  StringRef MangledName = CGM.getMangledName(GD);
11275  llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
11276  if (Orig && !Orig->isDeclaration())
11277    return false;
11278  const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D);
11279  // Emit original function if it does not have declare variant attribute or the
11280  // context does not match.
11281  if (NewFD == D)
11282    return false;
11283  GlobalDecl NewGD = GD.getWithDecl(NewFD);
11284  if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
11285    DeferredVariantFunction.erase(D);
11286    return true;
11287  }
11288  DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
11289  return true;
11290}
11291
11292CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11293    CodeGenModule &CGM, const OMPLoopDirective &S)
11294    : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11295  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11296  if (!NeedToPush)
11297    return;
11298  NontemporalDeclsSet &DS =
11299      CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11300  for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11301    for (const Stmt *Ref : C->private_refs()) {
11302      const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11303      const ValueDecl *VD;
11304      if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11305        VD = DRE->getDecl();
11306      } else {
11307        const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11308        assert((ME->isImplicitCXXThis() ||
11309                isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11310               "Expected member of current class.");
11311        VD = ME->getMemberDecl();
11312      }
11313      DS.insert(VD);
11314    }
11315  }
11316}
11317
11318CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11319  if (!NeedToPush)
11320    return;
11321  CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11322}
11323
11324bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11325  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11326
11327  return llvm::any_of(
11328      CGM.getOpenMPRuntime().NontemporalDeclsStack,
11329      [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11330}
11331
11332CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11333    CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11334    : CGM(CGF.CGM),
11335      NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11336                              [](const OMPLastprivateClause *C) {
11337                                return C->getKind() ==
11338                                       OMPC_LASTPRIVATE_conditional;
11339                              })) {
11340  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11341  if (!NeedToPush)
11342    return;
11343  LastprivateConditionalData &Data =
11344      CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11345  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11346    if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11347      continue;
11348
11349    for (const Expr *Ref : C->varlists()) {
11350      Data.DeclToUniqeName.try_emplace(
11351          cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11352          generateUniqueName(CGM, "pl_cond", Ref));
11353    }
11354  }
11355  Data.IVLVal = IVLVal;
11356  // In simd only mode or for simd directives no need to generate threadprivate
11357  // references for the loop iteration counter, we can use the original one
11358  // since outlining cannot happen in simd regions.
11359  if (CGF.getLangOpts().OpenMPSimd ||
11360      isOpenMPSimdDirective(S.getDirectiveKind())) {
11361    Data.UseOriginalIV = true;
11362    return;
11363  }
11364  llvm::SmallString<16> Buffer;
11365  llvm::raw_svector_ostream OS(Buffer);
11366  PresumedLoc PLoc =
11367      CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc());
11368  assert(PLoc.isValid() && "Source location is expected to be always valid.");
11369
11370  llvm::sys::fs::UniqueID ID;
11371  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
11372    CGM.getDiags().Report(diag::err_cannot_open_file)
11373        << PLoc.getFilename() << EC.message();
11374  OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_"
11375     << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv";
11376  Data.IVName = OS.str();
11377}
11378
11379CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11380  if (!NeedToPush)
11381    return;
11382  CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11383}
11384
11385void CGOpenMPRuntime::initLastprivateConditionalCounter(
11386    CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11387  if (CGM.getLangOpts().OpenMPSimd ||
11388      !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11389                    [](const OMPLastprivateClause *C) {
11390                      return C->getKind() == OMPC_LASTPRIVATE_conditional;
11391                    }))
11392    return;
11393  const CGOpenMPRuntime::LastprivateConditionalData &Data =
11394      LastprivateConditionalStack.back();
11395  if (Data.UseOriginalIV)
11396    return;
11397  // Global loop counter. Required to handle inner parallel-for regions.
11398  // global_iv = iv;
11399  Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
11400      CGF, Data.IVLVal.getType(), Data.IVName);
11401  LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType());
11402  llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc());
11403  CGF.EmitStoreOfScalar(IVVal, GlobIVLVal);
11404}
11405
11406namespace {
11407/// Checks if the lastprivate conditional variable is referenced in LHS.
11408class LastprivateConditionalRefChecker final
11409    : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11410  CodeGenFunction &CGF;
11411  ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11412  const Expr *FoundE = nullptr;
11413  const Decl *FoundD = nullptr;
11414  StringRef UniqueDeclName;
11415  LValue IVLVal;
11416  StringRef IVName;
11417  SourceLocation Loc;
11418  bool UseOriginalIV = false;
11419
11420public:
11421  bool VisitDeclRefExpr(const DeclRefExpr *E) {
11422    for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11423         llvm::reverse(LPM)) {
11424      auto It = D.DeclToUniqeName.find(E->getDecl());
11425      if (It == D.DeclToUniqeName.end())
11426        continue;
11427      FoundE = E;
11428      FoundD = E->getDecl()->getCanonicalDecl();
11429      UniqueDeclName = It->getSecond();
11430      IVLVal = D.IVLVal;
11431      IVName = D.IVName;
11432      UseOriginalIV = D.UseOriginalIV;
11433      break;
11434    }
11435    return FoundE == E;
11436  }
11437  bool VisitMemberExpr(const MemberExpr *E) {
11438    if (!CGF.IsWrappedCXXThis(E->getBase()))
11439      return false;
11440    for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11441         llvm::reverse(LPM)) {
11442      auto It = D.DeclToUniqeName.find(E->getMemberDecl());
11443      if (It == D.DeclToUniqeName.end())
11444        continue;
11445      FoundE = E;
11446      FoundD = E->getMemberDecl()->getCanonicalDecl();
11447      UniqueDeclName = It->getSecond();
11448      IVLVal = D.IVLVal;
11449      IVName = D.IVName;
11450      UseOriginalIV = D.UseOriginalIV;
11451      break;
11452    }
11453    return FoundE == E;
11454  }
11455  bool VisitStmt(const Stmt *S) {
11456    for (const Stmt *Child : S->children()) {
11457      if (!Child)
11458        continue;
11459      if (const auto *E = dyn_cast<Expr>(Child))
11460        if (!E->isGLValue())
11461          continue;
11462      if (Visit(Child))
11463        return true;
11464    }
11465    return false;
11466  }
11467  explicit LastprivateConditionalRefChecker(
11468      CodeGenFunction &CGF,
11469      ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11470      : CGF(CGF), LPM(LPM) {}
11471  std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool>
11472  getFoundData() const {
11473    return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName,
11474                           UseOriginalIV);
11475  }
11476};
11477} // namespace
11478
11479void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11480                                                         const Expr *LHS) {
11481  if (CGF.getLangOpts().OpenMP < 50)
11482    return;
11483  LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack);
11484  if (!Checker.Visit(LHS))
11485    return;
11486  const Expr *FoundE;
11487  const Decl *FoundD;
11488  StringRef UniqueDeclName;
11489  LValue IVLVal;
11490  StringRef IVName;
11491  bool UseOriginalIV;
11492  std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) =
11493      Checker.getFoundData();
11494
11495  // Last updated loop counter for the lastprivate conditional var.
11496  // int<xx> last_iv = 0;
11497  llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11498  llvm::Constant *LastIV =
11499      getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv");
11500  cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11501      IVLVal.getAlignment().getAsAlign());
11502  LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11503
11504  // Private address of the lastprivate conditional in the current context.
11505  // priv_a
11506  LValue LVal = CGF.EmitLValue(FoundE);
11507  // Last value of the lastprivate conditional.
11508  // decltype(priv_a) last_a;
11509  llvm::Constant *Last = getOrCreateInternalVariable(
11510      LVal.getAddress(CGF).getElementType(), UniqueDeclName);
11511  cast<llvm::GlobalVariable>(Last)->setAlignment(
11512      LVal.getAlignment().getAsAlign());
11513  LValue LastLVal =
11514      CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11515
11516  // Global loop counter. Required to handle inner parallel-for regions.
11517  // global_iv
11518  if (!UseOriginalIV) {
11519    Address IVAddr =
11520        getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName);
11521    IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType());
11522  }
11523  llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc());
11524
11525  // #pragma omp critical(a)
11526  // if (last_iv <= iv) {
11527  //   last_iv = iv;
11528  //   last_a = priv_a;
11529  // }
11530  auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11531                    FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) {
11532    Action.Enter(CGF);
11533    llvm::Value *LastIVVal =
11534        CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc());
11535    // (last_iv <= global_iv) ? Check if the variable is updated and store new
11536    // value in global var.
11537    llvm::Value *CmpRes;
11538    if (IVLVal.getType()->isSignedIntegerType()) {
11539      CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11540    } else {
11541      assert(IVLVal.getType()->isUnsignedIntegerType() &&
11542             "Loop iteration variable must be integer.");
11543      CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11544    }
11545    llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11546    llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11547    CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11548    // {
11549    CGF.EmitBlock(ThenBB);
11550
11551    //   last_iv = global_iv;
11552    CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11553
11554    //   last_a = priv_a;
11555    switch (CGF.getEvaluationKind(LVal.getType())) {
11556    case TEK_Scalar: {
11557      llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc());
11558      CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11559      break;
11560    }
11561    case TEK_Complex: {
11562      CodeGenFunction::ComplexPairTy PrivVal =
11563          CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc());
11564      CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11565      break;
11566    }
11567    case TEK_Aggregate:
11568      llvm_unreachable(
11569          "Aggregates are not supported in lastprivate conditional.");
11570    }
11571    // }
11572    CGF.EmitBranch(ExitBB);
11573    // There is no need to emit line number for unconditional branch.
11574    (void)ApplyDebugLocation::CreateEmpty(CGF);
11575    CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11576  };
11577
11578  if (CGM.getLangOpts().OpenMPSimd) {
11579    // Do not emit as a critical region as no parallel region could be emitted.
11580    RegionCodeGenTy ThenRCG(CodeGen);
11581    ThenRCG(CGF);
11582  } else {
11583    emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc());
11584  }
11585}
11586
11587void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11588    CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11589    SourceLocation Loc) {
11590  if (CGF.getLangOpts().OpenMP < 50)
11591    return;
11592  auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD);
11593  assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() &&
11594         "Unknown lastprivate conditional variable.");
11595  StringRef UniqueName = It->getSecond();
11596  llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11597  // The variable was not updated in the region - exit.
11598  if (!GV)
11599    return;
11600  LValue LPLVal = CGF.MakeAddrLValue(
11601      GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11602  llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11603  CGF.EmitStoreOfScalar(Res, PrivLVal);
11604}
11605
11606llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11607    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11608    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11609  llvm_unreachable("Not supported in SIMD-only mode");
11610}
11611
11612llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11613    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11614    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11615  llvm_unreachable("Not supported in SIMD-only mode");
11616}
11617
11618llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11619    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11620    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11621    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11622    bool Tied, unsigned &NumberOfParts) {
11623  llvm_unreachable("Not supported in SIMD-only mode");
11624}
11625
11626void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11627                                           SourceLocation Loc,
11628                                           llvm::Function *OutlinedFn,
11629                                           ArrayRef<llvm::Value *> CapturedVars,
11630                                           const Expr *IfCond) {
11631  llvm_unreachable("Not supported in SIMD-only mode");
11632}
11633
11634void CGOpenMPSIMDRuntime::emitCriticalRegion(
11635    CodeGenFunction &CGF, StringRef CriticalName,
11636    const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11637    const Expr *Hint) {
11638  llvm_unreachable("Not supported in SIMD-only mode");
11639}
11640
11641void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11642                                           const RegionCodeGenTy &MasterOpGen,
11643                                           SourceLocation Loc) {
11644  llvm_unreachable("Not supported in SIMD-only mode");
11645}
11646
11647void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11648                                            SourceLocation Loc) {
11649  llvm_unreachable("Not supported in SIMD-only mode");
11650}
11651
11652void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11653    CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11654    SourceLocation Loc) {
11655  llvm_unreachable("Not supported in SIMD-only mode");
11656}
11657
11658void CGOpenMPSIMDRuntime::emitSingleRegion(
11659    CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11660    SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11661    ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11662    ArrayRef<const Expr *> AssignmentOps) {
11663  llvm_unreachable("Not supported in SIMD-only mode");
11664}
11665
11666void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11667                                            const RegionCodeGenTy &OrderedOpGen,
11668                                            SourceLocation Loc,
11669                                            bool IsThreads) {
11670  llvm_unreachable("Not supported in SIMD-only mode");
11671}
11672
11673void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11674                                          SourceLocation Loc,
11675                                          OpenMPDirectiveKind Kind,
11676                                          bool EmitChecks,
11677                                          bool ForceSimpleCall) {
11678  llvm_unreachable("Not supported in SIMD-only mode");
11679}
11680
11681void CGOpenMPSIMDRuntime::emitForDispatchInit(
11682    CodeGenFunction &CGF, SourceLocation Loc,
11683    const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11684    bool Ordered, const DispatchRTInput &DispatchValues) {
11685  llvm_unreachable("Not supported in SIMD-only mode");
11686}
11687
11688void CGOpenMPSIMDRuntime::emitForStaticInit(
11689    CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11690    const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11691  llvm_unreachable("Not supported in SIMD-only mode");
11692}
11693
11694void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11695    CodeGenFunction &CGF, SourceLocation Loc,
11696    OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11697  llvm_unreachable("Not supported in SIMD-only mode");
11698}
11699
11700void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11701                                                     SourceLocation Loc,
11702                                                     unsigned IVSize,
11703                                                     bool IVSigned) {
11704  llvm_unreachable("Not supported in SIMD-only mode");
11705}
11706
11707void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11708                                              SourceLocation Loc,
11709                                              OpenMPDirectiveKind DKind) {
11710  llvm_unreachable("Not supported in SIMD-only mode");
11711}
11712
11713llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11714                                              SourceLocation Loc,
11715                                              unsigned IVSize, bool IVSigned,
11716                                              Address IL, Address LB,
11717                                              Address UB, Address ST) {
11718  llvm_unreachable("Not supported in SIMD-only mode");
11719}
11720
11721void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11722                                               llvm::Value *NumThreads,
11723                                               SourceLocation Loc) {
11724  llvm_unreachable("Not supported in SIMD-only mode");
11725}
11726
11727void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11728                                             ProcBindKind ProcBind,
11729                                             SourceLocation Loc) {
11730  llvm_unreachable("Not supported in SIMD-only mode");
11731}
11732
11733Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11734                                                    const VarDecl *VD,
11735                                                    Address VDAddr,
11736                                                    SourceLocation Loc) {
11737  llvm_unreachable("Not supported in SIMD-only mode");
11738}
11739
11740llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11741    const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11742    CodeGenFunction *CGF) {
11743  llvm_unreachable("Not supported in SIMD-only mode");
11744}
11745
11746Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11747    CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11748  llvm_unreachable("Not supported in SIMD-only mode");
11749}
11750
11751void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11752                                    ArrayRef<const Expr *> Vars,
11753                                    SourceLocation Loc) {
11754  llvm_unreachable("Not supported in SIMD-only mode");
11755}
11756
11757void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11758                                       const OMPExecutableDirective &D,
11759                                       llvm::Function *TaskFunction,
11760                                       QualType SharedsTy, Address Shareds,
11761                                       const Expr *IfCond,
11762                                       const OMPTaskDataTy &Data) {
11763  llvm_unreachable("Not supported in SIMD-only mode");
11764}
11765
11766void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11767    CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11768    llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11769    const Expr *IfCond, const OMPTaskDataTy &Data) {
11770  llvm_unreachable("Not supported in SIMD-only mode");
11771}
11772
11773void CGOpenMPSIMDRuntime::emitReduction(
11774    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11775    ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11776    ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11777  assert(Options.SimpleReduction && "Only simple reduction is expected.");
11778  CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11779                                 ReductionOps, Options);
11780}
11781
11782llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11783    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11784    ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11785  llvm_unreachable("Not supported in SIMD-only mode");
11786}
11787
11788void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11789                                                  SourceLocation Loc,
11790                                                  ReductionCodeGen &RCG,
11791                                                  unsigned N) {
11792  llvm_unreachable("Not supported in SIMD-only mode");
11793}
11794
11795Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11796                                                  SourceLocation Loc,
11797                                                  llvm::Value *ReductionsPtr,
11798                                                  LValue SharedLVal) {
11799  llvm_unreachable("Not supported in SIMD-only mode");
11800}
11801
11802void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11803                                           SourceLocation Loc) {
11804  llvm_unreachable("Not supported in SIMD-only mode");
11805}
11806
11807void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11808    CodeGenFunction &CGF, SourceLocation Loc,
11809    OpenMPDirectiveKind CancelRegion) {
11810  llvm_unreachable("Not supported in SIMD-only mode");
11811}
11812
11813void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11814                                         SourceLocation Loc, const Expr *IfCond,
11815                                         OpenMPDirectiveKind CancelRegion) {
11816  llvm_unreachable("Not supported in SIMD-only mode");
11817}
11818
11819void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11820    const OMPExecutableDirective &D, StringRef ParentName,
11821    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11822    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11823  llvm_unreachable("Not supported in SIMD-only mode");
11824}
11825
11826void CGOpenMPSIMDRuntime::emitTargetCall(
11827    CodeGenFunction &CGF, const OMPExecutableDirective &D,
11828    llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11829    const Expr *Device,
11830    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11831                                     const OMPLoopDirective &D)>
11832        SizeEmitter) {
11833  llvm_unreachable("Not supported in SIMD-only mode");
11834}
11835
11836bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11837  llvm_unreachable("Not supported in SIMD-only mode");
11838}
11839
11840bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11841  llvm_unreachable("Not supported in SIMD-only mode");
11842}
11843
11844bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11845  return false;
11846}
11847
11848void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11849                                        const OMPExecutableDirective &D,
11850                                        SourceLocation Loc,
11851                                        llvm::Function *OutlinedFn,
11852                                        ArrayRef<llvm::Value *> CapturedVars) {
11853  llvm_unreachable("Not supported in SIMD-only mode");
11854}
11855
11856void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11857                                             const Expr *NumTeams,
11858                                             const Expr *ThreadLimit,
11859                                             SourceLocation Loc) {
11860  llvm_unreachable("Not supported in SIMD-only mode");
11861}
11862
11863void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11864    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11865    const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11866  llvm_unreachable("Not supported in SIMD-only mode");
11867}
11868
11869void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11870    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11871    const Expr *Device) {
11872  llvm_unreachable("Not supported in SIMD-only mode");
11873}
11874
11875void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11876                                           const OMPLoopDirective &D,
11877                                           ArrayRef<Expr *> NumIterations) {
11878  llvm_unreachable("Not supported in SIMD-only mode");
11879}
11880
11881void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11882                                              const OMPDependClause *C) {
11883  llvm_unreachable("Not supported in SIMD-only mode");
11884}
11885
11886const VarDecl *
11887CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
11888                                        const VarDecl *NativeParam) const {
11889  llvm_unreachable("Not supported in SIMD-only mode");
11890}
11891
11892Address
11893CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
11894                                         const VarDecl *NativeParam,
11895                                         const VarDecl *TargetParam) const {
11896  llvm_unreachable("Not supported in SIMD-only mode");
11897}
11898