1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "CGCXXABI.h"
15#include "CGCleanup.h"
16#include "CGRecordLayout.h"
17#include "CodeGenFunction.h"
18#include "clang/AST/APValue.h"
19#include "clang/AST/Attr.h"
20#include "clang/AST/Decl.h"
21#include "clang/AST/OpenMPClause.h"
22#include "clang/AST/StmtOpenMP.h"
23#include "clang/AST/StmtVisitor.h"
24#include "clang/Basic/BitmaskEnum.h"
25#include "clang/Basic/FileManager.h"
26#include "clang/Basic/OpenMPKinds.h"
27#include "clang/Basic/SourceManager.h"
28#include "clang/CodeGen/ConstantInitBuilder.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/SetOperations.h"
31#include "llvm/ADT/StringExtras.h"
32#include "llvm/Bitcode/BitcodeReader.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/GlobalValue.h"
36#include "llvm/IR/Value.h"
37#include "llvm/Support/AtomicOrdering.h"
38#include "llvm/Support/Format.h"
39#include "llvm/Support/raw_ostream.h"
40#include <cassert>
41#include <numeric>
42
43using namespace clang;
44using namespace CodeGen;
45using namespace llvm::omp;
46
47namespace {
48/// Base class for handling code generation inside OpenMP regions.
49class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50public:
51  /// Kinds of OpenMP regions used in codegen.
52  enum CGOpenMPRegionKind {
53    /// Region with outlined function for standalone 'parallel'
54    /// directive.
55    ParallelOutlinedRegion,
56    /// Region with outlined function for standalone 'task' directive.
57    TaskOutlinedRegion,
58    /// Region for constructs that do not require function outlining,
59    /// like 'for', 'sections', 'atomic' etc. directives.
60    InlinedRegion,
61    /// Region with outlined function for standalone 'target' directive.
62    TargetRegion,
63  };
64
65  CGOpenMPRegionInfo(const CapturedStmt &CS,
66                     const CGOpenMPRegionKind RegionKind,
67                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                     bool HasCancel)
69      : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70        CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71
72  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                     bool HasCancel)
75      : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76        Kind(Kind), HasCancel(HasCancel) {}
77
78  /// Get a variable or parameter for storing global thread id
79  /// inside OpenMP construct.
80  virtual const VarDecl *getThreadIDVariable() const = 0;
81
82  /// Emit the captured statement body.
83  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84
85  /// Get an LValue for the current ThreadID variable.
86  /// \return LValue for thread id variable. This LValue always has type int32*.
87  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88
89  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90
91  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92
93  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94
95  bool hasCancel() const { return HasCancel; }
96
97  static bool classof(const CGCapturedStmtInfo *Info) {
98    return Info->getKind() == CR_OpenMP;
99  }
100
101  ~CGOpenMPRegionInfo() override = default;
102
103protected:
104  CGOpenMPRegionKind RegionKind;
105  RegionCodeGenTy CodeGen;
106  OpenMPDirectiveKind Kind;
107  bool HasCancel;
108};
109
110/// API for captured statement code generation in OpenMP constructs.
111class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112public:
113  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                             const RegionCodeGenTy &CodeGen,
115                             OpenMPDirectiveKind Kind, bool HasCancel,
116                             StringRef HelperName)
117      : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                           HasCancel),
119        ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121  }
122
123  /// Get a variable or parameter for storing global thread id
124  /// inside OpenMP construct.
125  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126
127  /// Get the name of the capture helper.
128  StringRef getHelperName() const override { return HelperName; }
129
130  static bool classof(const CGCapturedStmtInfo *Info) {
131    return CGOpenMPRegionInfo::classof(Info) &&
132           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133               ParallelOutlinedRegion;
134  }
135
136private:
137  /// A variable or parameter storing global thread id for OpenMP
138  /// constructs.
139  const VarDecl *ThreadIDVar;
140  StringRef HelperName;
141};
142
143/// API for captured statement code generation in OpenMP constructs.
144class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145public:
146  class UntiedTaskActionTy final : public PrePostActionTy {
147    bool Untied;
148    const VarDecl *PartIDVar;
149    const RegionCodeGenTy UntiedCodeGen;
150    llvm::SwitchInst *UntiedSwitch = nullptr;
151
152  public:
153    UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                       const RegionCodeGenTy &UntiedCodeGen)
155        : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156    void Enter(CodeGenFunction &CGF) override {
157      if (Untied) {
158        // Emit task switching point.
159        LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160            CGF.GetAddrOfLocalVar(PartIDVar),
161            PartIDVar->getType()->castAs<PointerType>());
162        llvm::Value *Res =
163            CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164        llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165        UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166        CGF.EmitBlock(DoneBB);
167        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169        UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                              CGF.Builder.GetInsertBlock());
171        emitUntiedSwitch(CGF);
172      }
173    }
174    void emitUntiedSwitch(CodeGenFunction &CGF) const {
175      if (Untied) {
176        LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177            CGF.GetAddrOfLocalVar(PartIDVar),
178            PartIDVar->getType()->castAs<PointerType>());
179        CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                              PartIdLVal);
181        UntiedCodeGen(CGF);
182        CodeGenFunction::JumpDest CurPoint =
183            CGF.getJumpDestInCurrentScope(".untied.next.");
184        CGF.EmitBranch(CGF.ReturnBlock.getBlock());
185        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186        UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                              CGF.Builder.GetInsertBlock());
188        CGF.EmitBranchThroughCleanup(CurPoint);
189        CGF.EmitBlock(CurPoint.getBlock());
190      }
191    }
192    unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193  };
194  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                 const VarDecl *ThreadIDVar,
196                                 const RegionCodeGenTy &CodeGen,
197                                 OpenMPDirectiveKind Kind, bool HasCancel,
198                                 const UntiedTaskActionTy &Action)
199      : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200        ThreadIDVar(ThreadIDVar), Action(Action) {
201    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202  }
203
204  /// Get a variable or parameter for storing global thread id
205  /// inside OpenMP construct.
206  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207
208  /// Get an LValue for the current ThreadID variable.
209  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210
211  /// Get the name of the capture helper.
212  StringRef getHelperName() const override { return ".omp_outlined."; }
213
214  void emitUntiedSwitch(CodeGenFunction &CGF) override {
215    Action.emitUntiedSwitch(CGF);
216  }
217
218  static bool classof(const CGCapturedStmtInfo *Info) {
219    return CGOpenMPRegionInfo::classof(Info) &&
220           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221               TaskOutlinedRegion;
222  }
223
224private:
225  /// A variable or parameter storing global thread id for OpenMP
226  /// constructs.
227  const VarDecl *ThreadIDVar;
228  /// Action for emitting code for untied tasks.
229  const UntiedTaskActionTy &Action;
230};
231
232/// API for inlined captured statement code generation in OpenMP
233/// constructs.
234class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235public:
236  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                            const RegionCodeGenTy &CodeGen,
238                            OpenMPDirectiveKind Kind, bool HasCancel)
239      : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240        OldCSI(OldCSI),
241        OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242
243  // Retrieve the value of the context parameter.
244  llvm::Value *getContextValue() const override {
245    if (OuterRegionInfo)
246      return OuterRegionInfo->getContextValue();
247    llvm_unreachable("No context value for inlined OpenMP region");
248  }
249
250  void setContextValue(llvm::Value *V) override {
251    if (OuterRegionInfo) {
252      OuterRegionInfo->setContextValue(V);
253      return;
254    }
255    llvm_unreachable("No context value for inlined OpenMP region");
256  }
257
258  /// Lookup the captured field decl for a variable.
259  const FieldDecl *lookup(const VarDecl *VD) const override {
260    if (OuterRegionInfo)
261      return OuterRegionInfo->lookup(VD);
262    // If there is no outer outlined region,no need to lookup in a list of
263    // captured variables, we can use the original one.
264    return nullptr;
265  }
266
267  FieldDecl *getThisFieldDecl() const override {
268    if (OuterRegionInfo)
269      return OuterRegionInfo->getThisFieldDecl();
270    return nullptr;
271  }
272
273  /// Get a variable or parameter for storing global thread id
274  /// inside OpenMP construct.
275  const VarDecl *getThreadIDVariable() const override {
276    if (OuterRegionInfo)
277      return OuterRegionInfo->getThreadIDVariable();
278    return nullptr;
279  }
280
281  /// Get an LValue for the current ThreadID variable.
282  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283    if (OuterRegionInfo)
284      return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285    llvm_unreachable("No LValue for inlined OpenMP construct");
286  }
287
288  /// Get the name of the capture helper.
289  StringRef getHelperName() const override {
290    if (auto *OuterRegionInfo = getOldCSI())
291      return OuterRegionInfo->getHelperName();
292    llvm_unreachable("No helper name for inlined OpenMP construct");
293  }
294
295  void emitUntiedSwitch(CodeGenFunction &CGF) override {
296    if (OuterRegionInfo)
297      OuterRegionInfo->emitUntiedSwitch(CGF);
298  }
299
300  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301
302  static bool classof(const CGCapturedStmtInfo *Info) {
303    return CGOpenMPRegionInfo::classof(Info) &&
304           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305  }
306
307  ~CGOpenMPInlinedRegionInfo() override = default;
308
309private:
310  /// CodeGen info about outer OpenMP region.
311  CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312  CGOpenMPRegionInfo *OuterRegionInfo;
313};
314
315/// API for captured statement code generation in OpenMP target
316/// constructs. For this captures, implicit parameters are used instead of the
317/// captured fields. The name of the target region has to be unique in a given
318/// application so it is provided by the client, because only the client has
319/// the information to generate that.
320class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321public:
322  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                           const RegionCodeGenTy &CodeGen, StringRef HelperName)
324      : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                           /*HasCancel=*/false),
326        HelperName(HelperName) {}
327
328  /// This is unused for target regions because each starts executing
329  /// with a single thread.
330  const VarDecl *getThreadIDVariable() const override { return nullptr; }
331
332  /// Get the name of the capture helper.
333  StringRef getHelperName() const override { return HelperName; }
334
335  static bool classof(const CGCapturedStmtInfo *Info) {
336    return CGOpenMPRegionInfo::classof(Info) &&
337           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338  }
339
340private:
341  StringRef HelperName;
342};
343
344static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345  llvm_unreachable("No codegen for expressions");
346}
347/// API for generation of expressions captured in a innermost OpenMP
348/// region.
349class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350public:
351  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352      : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                  OMPD_unknown,
354                                  /*HasCancel=*/false),
355        PrivScope(CGF) {
356    // Make sure the globals captured in the provided statement are local by
357    // using the privatization logic. We assume the same variable is not
358    // captured more than once.
359    for (const auto &C : CS.captures()) {
360      if (!C.capturesVariable() && !C.capturesVariableByCopy())
361        continue;
362
363      const VarDecl *VD = C.getCapturedVar();
364      if (VD->isLocalVarDeclOrParm())
365        continue;
366
367      DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                      /*RefersToEnclosingVariableOrCapture=*/false,
369                      VD->getType().getNonReferenceType(), VK_LValue,
370                      C.getLocation());
371      PrivScope.addPrivate(
372          VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373    }
374    (void)PrivScope.Privatize();
375  }
376
377  /// Lookup the captured field decl for a variable.
378  const FieldDecl *lookup(const VarDecl *VD) const override {
379    if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380      return FD;
381    return nullptr;
382  }
383
384  /// Emit the captured statement body.
385  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386    llvm_unreachable("No body for expressions");
387  }
388
389  /// Get a variable or parameter for storing global thread id
390  /// inside OpenMP construct.
391  const VarDecl *getThreadIDVariable() const override {
392    llvm_unreachable("No thread id for expressions");
393  }
394
395  /// Get the name of the capture helper.
396  StringRef getHelperName() const override {
397    llvm_unreachable("No helper name for expressions");
398  }
399
400  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401
402private:
403  /// Private scope to capture global variables.
404  CodeGenFunction::OMPPrivateScope PrivScope;
405};
406
407/// RAII for emitting code of OpenMP constructs.
408class InlinedOpenMPRegionRAII {
409  CodeGenFunction &CGF;
410  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411  FieldDecl *LambdaThisCaptureField = nullptr;
412  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413  bool NoInheritance = false;
414
415public:
416  /// Constructs region for combined constructs.
417  /// \param CodeGen Code generation sequence for combined directives. Includes
418  /// a list of functions used for code generation of implicitly inlined
419  /// regions.
420  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
421                          OpenMPDirectiveKind Kind, bool HasCancel,
422                          bool NoInheritance = true)
423      : CGF(CGF), NoInheritance(NoInheritance) {
424    // Start emission for the construct.
425    CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
426        CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
427    if (NoInheritance) {
428      std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429      LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430      CGF.LambdaThisCaptureField = nullptr;
431      BlockInfo = CGF.BlockInfo;
432      CGF.BlockInfo = nullptr;
433    }
434  }
435
436  ~InlinedOpenMPRegionRAII() {
437    // Restore original CapturedStmtInfo only if we're done with code emission.
438    auto *OldCSI =
439        cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
440    delete CGF.CapturedStmtInfo;
441    CGF.CapturedStmtInfo = OldCSI;
442    if (NoInheritance) {
443      std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
444      CGF.LambdaThisCaptureField = LambdaThisCaptureField;
445      CGF.BlockInfo = BlockInfo;
446    }
447  }
448};
449
450/// Values for bit flags used in the ident_t to describe the fields.
451/// All enumeric elements are named and described in accordance with the code
452/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
453enum OpenMPLocationFlags : unsigned {
454  /// Use trampoline for internal microtask.
455  OMP_IDENT_IMD = 0x01,
456  /// Use c-style ident structure.
457  OMP_IDENT_KMPC = 0x02,
458  /// Atomic reduction option for kmpc_reduce.
459  OMP_ATOMIC_REDUCE = 0x10,
460  /// Explicit 'barrier' directive.
461  OMP_IDENT_BARRIER_EXPL = 0x20,
462  /// Implicit barrier in code.
463  OMP_IDENT_BARRIER_IMPL = 0x40,
464  /// Implicit barrier in 'for' directive.
465  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
466  /// Implicit barrier in 'sections' directive.
467  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
468  /// Implicit barrier in 'single' directive.
469  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
470  /// Call of __kmp_for_static_init for static loop.
471  OMP_IDENT_WORK_LOOP = 0x200,
472  /// Call of __kmp_for_static_init for sections.
473  OMP_IDENT_WORK_SECTIONS = 0x400,
474  /// Call of __kmp_for_static_init for distribute.
475  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
476  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
477};
478
479namespace {
480LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
481/// Values for bit flags for marking which requires clauses have been used.
482enum OpenMPOffloadingRequiresDirFlags : int64_t {
483  /// flag undefined.
484  OMP_REQ_UNDEFINED               = 0x000,
485  /// no requires clause present.
486  OMP_REQ_NONE                    = 0x001,
487  /// reverse_offload clause.
488  OMP_REQ_REVERSE_OFFLOAD         = 0x002,
489  /// unified_address clause.
490  OMP_REQ_UNIFIED_ADDRESS         = 0x004,
491  /// unified_shared_memory clause.
492  OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
493  /// dynamic_allocators clause.
494  OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
495  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
496};
497
498enum OpenMPOffloadingReservedDeviceIDs {
499  /// Device ID if the device was not defined, runtime should get it
500  /// from environment variables in the spec.
501  OMP_DEVICEID_UNDEF = -1,
502};
503} // anonymous namespace
504
505/// Describes ident structure that describes a source location.
506/// All descriptions are taken from
507/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
508/// Original structure:
509/// typedef struct ident {
510///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
511///                                  see above  */
512///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
513///                                  KMP_IDENT_KMPC identifies this union
514///                                  member  */
515///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
516///                                  see above */
517///#if USE_ITT_BUILD
518///                            /*  but currently used for storing
519///                                region-specific ITT */
520///                            /*  contextual information. */
521///#endif /* USE_ITT_BUILD */
522///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
523///                                 C++  */
524///    char const *psource;    /**< String describing the source location.
525///                            The string is composed of semi-colon separated
526//                             fields which describe the source file,
527///                            the function and a pair of line numbers that
528///                            delimit the construct.
529///                             */
530/// } ident_t;
531enum IdentFieldIndex {
532  /// might be used in Fortran
533  IdentField_Reserved_1,
534  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535  IdentField_Flags,
536  /// Not really used in Fortran any more
537  IdentField_Reserved_2,
538  /// Source[4] in Fortran, do not use for C++
539  IdentField_Reserved_3,
540  /// String describing the source location. The string is composed of
541  /// semi-colon separated fields which describe the source file, the function
542  /// and a pair of line numbers that delimit the construct.
543  IdentField_PSource
544};
545
546/// Schedule types for 'omp for' loops (these enumerators are taken from
547/// the enum sched_type in kmp.h).
548enum OpenMPSchedType {
549  /// Lower bound for default (unordered) versions.
550  OMP_sch_lower = 32,
551  OMP_sch_static_chunked = 33,
552  OMP_sch_static = 34,
553  OMP_sch_dynamic_chunked = 35,
554  OMP_sch_guided_chunked = 36,
555  OMP_sch_runtime = 37,
556  OMP_sch_auto = 38,
557  /// static with chunk adjustment (e.g., simd)
558  OMP_sch_static_balanced_chunked = 45,
559  /// Lower bound for 'ordered' versions.
560  OMP_ord_lower = 64,
561  OMP_ord_static_chunked = 65,
562  OMP_ord_static = 66,
563  OMP_ord_dynamic_chunked = 67,
564  OMP_ord_guided_chunked = 68,
565  OMP_ord_runtime = 69,
566  OMP_ord_auto = 70,
567  OMP_sch_default = OMP_sch_static,
568  /// dist_schedule types
569  OMP_dist_sch_static_chunked = 91,
570  OMP_dist_sch_static = 92,
571  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
572  /// Set if the monotonic schedule modifier was present.
573  OMP_sch_modifier_monotonic = (1 << 29),
574  /// Set if the nonmonotonic schedule modifier was present.
575  OMP_sch_modifier_nonmonotonic = (1 << 30),
576};
577
578/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579/// region.
580class CleanupTy final : public EHScopeStack::Cleanup {
581  PrePostActionTy *Action;
582
583public:
584  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
585  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
586    if (!CGF.HaveInsertPoint())
587      return;
588    Action->Exit(CGF);
589  }
590};
591
592} // anonymous namespace
593
594void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
595  CodeGenFunction::RunCleanupsScope Scope(CGF);
596  if (PrePostAction) {
597    CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
598    Callback(CodeGen, CGF, *PrePostAction);
599  } else {
600    PrePostActionTy Action;
601    Callback(CodeGen, CGF, Action);
602  }
603}
604
605/// Check if the combiner is a call to UDR combiner and if it is so return the
606/// UDR decl used for reduction.
607static const OMPDeclareReductionDecl *
608getReductionInit(const Expr *ReductionOp) {
609  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
610    if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
611      if (const auto *DRE =
612              dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
613        if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
614          return DRD;
615  return nullptr;
616}
617
618static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
619                                             const OMPDeclareReductionDecl *DRD,
620                                             const Expr *InitOp,
621                                             Address Private, Address Original,
622                                             QualType Ty) {
623  if (DRD->getInitializer()) {
624    std::pair<llvm::Function *, llvm::Function *> Reduction =
625        CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
626    const auto *CE = cast<CallExpr>(InitOp);
627    const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
628    const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
629    const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630    const auto *LHSDRE =
631        cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
632    const auto *RHSDRE =
633        cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
634    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
635    PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
636                            [=]() { return Private; });
637    PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
638                            [=]() { return Original; });
639    (void)PrivateScope.Privatize();
640    RValue Func = RValue::get(Reduction.second);
641    CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642    CGF.EmitIgnoredExpr(InitOp);
643  } else {
644    llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645    std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646    auto *GV = new llvm::GlobalVariable(
647        CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648        llvm::GlobalValue::PrivateLinkage, Init, Name);
649    LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650    RValue InitRVal;
651    switch (CGF.getEvaluationKind(Ty)) {
652    case TEK_Scalar:
653      InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654      break;
655    case TEK_Complex:
656      InitRVal =
657          RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658      break;
659    case TEK_Aggregate: {
660      OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661      CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662      CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                           /*IsInitializer=*/false);
664      return;
665    }
666    }
667    OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
668    CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669    CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                         /*IsInitializer=*/false);
671  }
672}
673
674/// Emit initialization of arrays of complex types.
675/// \param DestAddr Address of the array.
676/// \param Type Type of array.
677/// \param Init Initial expression of array.
678/// \param SrcAddr Address of the original array.
679static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                 QualType Type, bool EmitDeclareReductionInit,
681                                 const Expr *Init,
682                                 const OMPDeclareReductionDecl *DRD,
683                                 Address SrcAddr = Address::invalid()) {
684  // Perform element-by-element initialization.
685  QualType ElementTy;
686
687  // Drill down to the base element type on both arrays.
688  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690  DestAddr =
691      CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
692  if (DRD)
693    SrcAddr =
694        CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695
696  llvm::Value *SrcBegin = nullptr;
697  if (DRD)
698    SrcBegin = SrcAddr.getPointer();
699  llvm::Value *DestBegin = DestAddr.getPointer();
700  // Cast from pointer to array type to pointer to single element.
701  llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
702  // The basic structure here is a while-do loop.
703  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
704  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
705  llvm::Value *IsEmpty =
706      CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
707  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
708
709  // Enter the loop body, making that address the current address.
710  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
711  CGF.EmitBlock(BodyBB);
712
713  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
714
715  llvm::PHINode *SrcElementPHI = nullptr;
716  Address SrcElementCurrent = Address::invalid();
717  if (DRD) {
718    SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
719                                          "omp.arraycpy.srcElementPast");
720    SrcElementPHI->addIncoming(SrcBegin, EntryBB);
721    SrcElementCurrent =
722        Address(SrcElementPHI,
723                SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
724  }
725  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
726      DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
727  DestElementPHI->addIncoming(DestBegin, EntryBB);
728  Address DestElementCurrent =
729      Address(DestElementPHI,
730              DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
731
732  // Emit copy.
733  {
734    CodeGenFunction::RunCleanupsScope InitScope(CGF);
735    if (EmitDeclareReductionInit) {
736      emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
737                                       SrcElementCurrent, ElementTy);
738    } else
739      CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
740                           /*IsInitializer=*/false);
741  }
742
743  if (DRD) {
744    // Shift the address forward by one element.
745    llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
746        SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
747    SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748  }
749
750  // Shift the address forward by one element.
751  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752      DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
753  // Check whether we've reached the end.
754  llvm::Value *Done =
755      CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
756  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
757  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
758
759  // Done.
760  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
761}
762
763LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
764  return CGF.EmitOMPSharedLValue(E);
765}
766
767LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
768                                            const Expr *E) {
769  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
770    return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
771  return LValue();
772}
773
774void ReductionCodeGen::emitAggregateInitialization(
775    CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
776    const OMPDeclareReductionDecl *DRD) {
777  // Emit VarDecl with copy init for arrays.
778  // Get the address of the original variable captured in current
779  // captured region.
780  const auto *PrivateVD =
781      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
782  bool EmitDeclareReductionInit =
783      DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
784  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
785                       EmitDeclareReductionInit,
786                       EmitDeclareReductionInit ? ClausesData[N].ReductionOp
787                                                : PrivateVD->getInit(),
788                       DRD, SharedLVal.getAddress(CGF));
789}
790
791ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
792                                   ArrayRef<const Expr *> Origs,
793                                   ArrayRef<const Expr *> Privates,
794                                   ArrayRef<const Expr *> ReductionOps) {
795  ClausesData.reserve(Shareds.size());
796  SharedAddresses.reserve(Shareds.size());
797  Sizes.reserve(Shareds.size());
798  BaseDecls.reserve(Shareds.size());
799  const auto *IOrig = Origs.begin();
800  const auto *IPriv = Privates.begin();
801  const auto *IRed = ReductionOps.begin();
802  for (const Expr *Ref : Shareds) {
803    ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
804    std::advance(IOrig, 1);
805    std::advance(IPriv, 1);
806    std::advance(IRed, 1);
807  }
808}
809
810void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
811  assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
812         "Number of generated lvalues must be exactly N.");
813  LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
814  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
815  SharedAddresses.emplace_back(First, Second);
816  if (ClausesData[N].Shared == ClausesData[N].Ref) {
817    OrigAddresses.emplace_back(First, Second);
818  } else {
819    LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
820    LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
821    OrigAddresses.emplace_back(First, Second);
822  }
823}
824
825void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
826  const auto *PrivateVD =
827      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
828  QualType PrivateType = PrivateVD->getType();
829  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
830  if (!PrivateType->isVariablyModifiedType()) {
831    Sizes.emplace_back(
832        CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
833        nullptr);
834    return;
835  }
836  llvm::Value *Size;
837  llvm::Value *SizeInChars;
838  auto *ElemType =
839      cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
840          ->getElementType();
841  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
842  if (AsArraySection) {
843    Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
844                                     OrigAddresses[N].first.getPointer(CGF));
845    Size = CGF.Builder.CreateNUWAdd(
846        Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
847    SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
848  } else {
849    SizeInChars =
850        CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
851    Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
852  }
853  Sizes.emplace_back(SizeInChars, Size);
854  CodeGenFunction::OpaqueValueMapping OpaqueMap(
855      CGF,
856      cast<OpaqueValueExpr>(
857          CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
858      RValue::get(Size));
859  CGF.EmitVariablyModifiedType(PrivateType);
860}
861
862void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
863                                         llvm::Value *Size) {
864  const auto *PrivateVD =
865      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
866  QualType PrivateType = PrivateVD->getType();
867  if (!PrivateType->isVariablyModifiedType()) {
868    assert(!Size && !Sizes[N].second &&
869           "Size should be nullptr for non-variably modified reduction "
870           "items.");
871    return;
872  }
873  CodeGenFunction::OpaqueValueMapping OpaqueMap(
874      CGF,
875      cast<OpaqueValueExpr>(
876          CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
877      RValue::get(Size));
878  CGF.EmitVariablyModifiedType(PrivateType);
879}
880
881void ReductionCodeGen::emitInitialization(
882    CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
883    llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
884  assert(SharedAddresses.size() > N && "No variable was generated");
885  const auto *PrivateVD =
886      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
887  const OMPDeclareReductionDecl *DRD =
888      getReductionInit(ClausesData[N].ReductionOp);
889  QualType PrivateType = PrivateVD->getType();
890  PrivateAddr = CGF.Builder.CreateElementBitCast(
891      PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
892  QualType SharedType = SharedAddresses[N].first.getType();
893  SharedLVal = CGF.MakeAddrLValue(
894      CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
895                                       CGF.ConvertTypeForMem(SharedType)),
896      SharedType, SharedAddresses[N].first.getBaseInfo(),
897      CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
898  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
899    if (DRD && DRD->getInitializer())
900      (void)DefaultInit(CGF);
901    emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
902  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
903    (void)DefaultInit(CGF);
904    emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
905                                     PrivateAddr, SharedLVal.getAddress(CGF),
906                                     SharedLVal.getType());
907  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
908             !CGF.isTrivialInitializer(PrivateVD->getInit())) {
909    CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
910                         PrivateVD->getType().getQualifiers(),
911                         /*IsInitializer=*/false);
912  }
913}
914
915bool ReductionCodeGen::needCleanups(unsigned N) {
916  const auto *PrivateVD =
917      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
918  QualType PrivateType = PrivateVD->getType();
919  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
920  return DTorKind != QualType::DK_none;
921}
922
923void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
924                                    Address PrivateAddr) {
925  const auto *PrivateVD =
926      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
927  QualType PrivateType = PrivateVD->getType();
928  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
929  if (needCleanups(N)) {
930    PrivateAddr = CGF.Builder.CreateElementBitCast(
931        PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
932    CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
933  }
934}
935
936static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
937                          LValue BaseLV) {
938  BaseTy = BaseTy.getNonReferenceType();
939  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
940         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
941    if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
942      BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
943    } else {
944      LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
945      BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
946    }
947    BaseTy = BaseTy->getPointeeType();
948  }
949  return CGF.MakeAddrLValue(
950      CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
951                                       CGF.ConvertTypeForMem(ElTy)),
952      BaseLV.getType(), BaseLV.getBaseInfo(),
953      CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
954}
955
956static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
957                          llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
958                          llvm::Value *Addr) {
959  Address Tmp = Address::invalid();
960  Address TopTmp = Address::invalid();
961  Address MostTopTmp = Address::invalid();
962  BaseTy = BaseTy.getNonReferenceType();
963  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
964         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
965    Tmp = CGF.CreateMemTemp(BaseTy);
966    if (TopTmp.isValid())
967      CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
968    else
969      MostTopTmp = Tmp;
970    TopTmp = Tmp;
971    BaseTy = BaseTy->getPointeeType();
972  }
973  llvm::Type *Ty = BaseLVType;
974  if (Tmp.isValid())
975    Ty = Tmp.getElementType();
976  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
977  if (Tmp.isValid()) {
978    CGF.Builder.CreateStore(Addr, Tmp);
979    return MostTopTmp;
980  }
981  return Address(Addr, BaseLVAlignment);
982}
983
984static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
985  const VarDecl *OrigVD = nullptr;
986  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
987    const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
988    while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
989      Base = TempOASE->getBase()->IgnoreParenImpCasts();
990    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
991      Base = TempASE->getBase()->IgnoreParenImpCasts();
992    DE = cast<DeclRefExpr>(Base);
993    OrigVD = cast<VarDecl>(DE->getDecl());
994  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
995    const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
996    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
997      Base = TempASE->getBase()->IgnoreParenImpCasts();
998    DE = cast<DeclRefExpr>(Base);
999    OrigVD = cast<VarDecl>(DE->getDecl());
1000  }
1001  return OrigVD;
1002}
1003
1004Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1005                                               Address PrivateAddr) {
1006  const DeclRefExpr *DE;
1007  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1008    BaseDecls.emplace_back(OrigVD);
1009    LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1010    LValue BaseLValue =
1011        loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1012                    OriginalBaseLValue);
1013    llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1014        BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1015    llvm::Value *PrivatePointer =
1016        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1017            PrivateAddr.getPointer(),
1018            SharedAddresses[N].first.getAddress(CGF).getType());
1019    llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1020    return castToBase(CGF, OrigVD->getType(),
1021                      SharedAddresses[N].first.getType(),
1022                      OriginalBaseLValue.getAddress(CGF).getType(),
1023                      OriginalBaseLValue.getAlignment(), Ptr);
1024  }
1025  BaseDecls.emplace_back(
1026      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1027  return PrivateAddr;
1028}
1029
1030bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1031  const OMPDeclareReductionDecl *DRD =
1032      getReductionInit(ClausesData[N].ReductionOp);
1033  return DRD && DRD->getInitializer();
1034}
1035
1036LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1037  return CGF.EmitLoadOfPointerLValue(
1038      CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1039      getThreadIDVariable()->getType()->castAs<PointerType>());
1040}
1041
1042void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1043  if (!CGF.HaveInsertPoint())
1044    return;
1045  // 1.2.2 OpenMP Language Terminology
1046  // Structured block - An executable statement with a single entry at the
1047  // top and a single exit at the bottom.
1048  // The point of exit cannot be a branch out of the structured block.
1049  // longjmp() and throw() must not violate the entry/exit criteria.
1050  CGF.EHStack.pushTerminate();
1051  if (S)
1052    CGF.incrementProfileCounter(S);
1053  CodeGen(CGF);
1054  CGF.EHStack.popTerminate();
1055}
1056
1057LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1058    CodeGenFunction &CGF) {
1059  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1060                            getThreadIDVariable()->getType(),
1061                            AlignmentSource::Decl);
1062}
1063
1064static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1065                                       QualType FieldTy) {
1066  auto *Field = FieldDecl::Create(
1067      C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1068      C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1069      /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1070  Field->setAccess(AS_public);
1071  DC->addDecl(Field);
1072  return Field;
1073}
1074
1075CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1076                                 StringRef Separator)
1077    : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1078      OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1079  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1080
1081  // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1082  OMPBuilder.initialize();
1083  loadOffloadInfoMetadata();
1084}
1085
1086void CGOpenMPRuntime::clear() {
1087  InternalVars.clear();
1088  // Clean non-target variable declarations possibly used only in debug info.
1089  for (const auto &Data : EmittedNonTargetVariables) {
1090    if (!Data.getValue().pointsToAliveValue())
1091      continue;
1092    auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1093    if (!GV)
1094      continue;
1095    if (!GV->isDeclaration() || GV->getNumUses() > 0)
1096      continue;
1097    GV->eraseFromParent();
1098  }
1099}
1100
1101std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1102  SmallString<128> Buffer;
1103  llvm::raw_svector_ostream OS(Buffer);
1104  StringRef Sep = FirstSeparator;
1105  for (StringRef Part : Parts) {
1106    OS << Sep << Part;
1107    Sep = Separator;
1108  }
1109  return std::string(OS.str());
1110}
1111
1112static llvm::Function *
1113emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1114                          const Expr *CombinerInitializer, const VarDecl *In,
1115                          const VarDecl *Out, bool IsCombiner) {
1116  // void .omp_combiner.(Ty *in, Ty *out);
1117  ASTContext &C = CGM.getContext();
1118  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1119  FunctionArgList Args;
1120  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1121                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1122  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1123                              /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1124  Args.push_back(&OmpOutParm);
1125  Args.push_back(&OmpInParm);
1126  const CGFunctionInfo &FnInfo =
1127      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1128  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1129  std::string Name = CGM.getOpenMPRuntime().getName(
1130      {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1131  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1132                                    Name, &CGM.getModule());
1133  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1134  if (CGM.getLangOpts().Optimize) {
1135    Fn->removeFnAttr(llvm::Attribute::NoInline);
1136    Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1137    Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1138  }
1139  CodeGenFunction CGF(CGM);
1140  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1141  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1142  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1143                    Out->getLocation());
1144  CodeGenFunction::OMPPrivateScope Scope(CGF);
1145  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1146  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1147    return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1148        .getAddress(CGF);
1149  });
1150  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1151  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1152    return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1153        .getAddress(CGF);
1154  });
1155  (void)Scope.Privatize();
1156  if (!IsCombiner && Out->hasInit() &&
1157      !CGF.isTrivialInitializer(Out->getInit())) {
1158    CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1159                         Out->getType().getQualifiers(),
1160                         /*IsInitializer=*/true);
1161  }
1162  if (CombinerInitializer)
1163    CGF.EmitIgnoredExpr(CombinerInitializer);
1164  Scope.ForceCleanup();
1165  CGF.FinishFunction();
1166  return Fn;
1167}
1168
1169void CGOpenMPRuntime::emitUserDefinedReduction(
1170    CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1171  if (UDRMap.count(D) > 0)
1172    return;
1173  llvm::Function *Combiner = emitCombinerOrInitializer(
1174      CGM, D->getType(), D->getCombiner(),
1175      cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1176      cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1177      /*IsCombiner=*/true);
1178  llvm::Function *Initializer = nullptr;
1179  if (const Expr *Init = D->getInitializer()) {
1180    Initializer = emitCombinerOrInitializer(
1181        CGM, D->getType(),
1182        D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1183                                                                     : nullptr,
1184        cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1185        cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1186        /*IsCombiner=*/false);
1187  }
1188  UDRMap.try_emplace(D, Combiner, Initializer);
1189  if (CGF) {
1190    auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1191    Decls.second.push_back(D);
1192  }
1193}
1194
1195std::pair<llvm::Function *, llvm::Function *>
1196CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1197  auto I = UDRMap.find(D);
1198  if (I != UDRMap.end())
1199    return I->second;
1200  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1201  return UDRMap.lookup(D);
1202}
1203
1204namespace {
1205// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1206// Builder if one is present.
1207struct PushAndPopStackRAII {
1208  PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1209                      bool HasCancel, llvm::omp::Directive Kind)
1210      : OMPBuilder(OMPBuilder) {
1211    if (!OMPBuilder)
1212      return;
1213
1214    // The following callback is the crucial part of clangs cleanup process.
1215    //
1216    // NOTE:
1217    // Once the OpenMPIRBuilder is used to create parallel regions (and
1218    // similar), the cancellation destination (Dest below) is determined via
1219    // IP. That means if we have variables to finalize we split the block at IP,
1220    // use the new block (=BB) as destination to build a JumpDest (via
1221    // getJumpDestInCurrentScope(BB)) which then is fed to
1222    // EmitBranchThroughCleanup. Furthermore, there will not be the need
1223    // to push & pop an FinalizationInfo object.
1224    // The FiniCB will still be needed but at the point where the
1225    // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1226    auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1227      assert(IP.getBlock()->end() == IP.getPoint() &&
1228             "Clang CG should cause non-terminated block!");
1229      CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1230      CGF.Builder.restoreIP(IP);
1231      CodeGenFunction::JumpDest Dest =
1232          CGF.getOMPCancelDestination(OMPD_parallel);
1233      CGF.EmitBranchThroughCleanup(Dest);
1234    };
1235
1236    // TODO: Remove this once we emit parallel regions through the
1237    //       OpenMPIRBuilder as it can do this setup internally.
1238    llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1239    OMPBuilder->pushFinalizationCB(std::move(FI));
1240  }
1241  ~PushAndPopStackRAII() {
1242    if (OMPBuilder)
1243      OMPBuilder->popFinalizationCB();
1244  }
1245  llvm::OpenMPIRBuilder *OMPBuilder;
1246};
1247} // namespace
1248
1249static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1250    CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1251    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1252    const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1253  assert(ThreadIDVar->getType()->isPointerType() &&
1254         "thread id variable must be of type kmp_int32 *");
1255  CodeGenFunction CGF(CGM, true);
1256  bool HasCancel = false;
1257  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1258    HasCancel = OPD->hasCancel();
1259  else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1260    HasCancel = OPD->hasCancel();
1261  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1262    HasCancel = OPSD->hasCancel();
1263  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1264    HasCancel = OPFD->hasCancel();
1265  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1266    HasCancel = OPFD->hasCancel();
1267  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1268    HasCancel = OPFD->hasCancel();
1269  else if (const auto *OPFD =
1270               dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1271    HasCancel = OPFD->hasCancel();
1272  else if (const auto *OPFD =
1273               dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1274    HasCancel = OPFD->hasCancel();
1275
1276  // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1277  //       parallel region to make cancellation barriers work properly.
1278  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1279  PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1280  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1281                                    HasCancel, OutlinedHelperName);
1282  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1283  return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1284}
1285
1286llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1287    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1288    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1289  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1290  return emitParallelOrTeamsOutlinedFunction(
1291      CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1292}
1293
1294llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1295    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1296    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1297  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1298  return emitParallelOrTeamsOutlinedFunction(
1299      CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1300}
1301
1302llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1303    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1304    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1305    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1306    bool Tied, unsigned &NumberOfParts) {
1307  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1308                                              PrePostActionTy &) {
1309    llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1310    llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1311    llvm::Value *TaskArgs[] = {
1312        UpLoc, ThreadID,
1313        CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1314                                    TaskTVar->getType()->castAs<PointerType>())
1315            .getPointer(CGF)};
1316    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1317                            CGM.getModule(), OMPRTL___kmpc_omp_task),
1318                        TaskArgs);
1319  };
1320  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1321                                                            UntiedCodeGen);
1322  CodeGen.setAction(Action);
1323  assert(!ThreadIDVar->getType()->isPointerType() &&
1324         "thread id variable must be of type kmp_int32 for tasks");
1325  const OpenMPDirectiveKind Region =
1326      isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1327                                                      : OMPD_task;
1328  const CapturedStmt *CS = D.getCapturedStmt(Region);
1329  bool HasCancel = false;
1330  if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1331    HasCancel = TD->hasCancel();
1332  else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1333    HasCancel = TD->hasCancel();
1334  else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1335    HasCancel = TD->hasCancel();
1336  else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1337    HasCancel = TD->hasCancel();
1338
1339  CodeGenFunction CGF(CGM, true);
1340  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1341                                        InnermostKind, HasCancel, Action);
1342  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1343  llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1344  if (!Tied)
1345    NumberOfParts = Action.getNumberOfParts();
1346  return Res;
1347}
1348
1349static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1350                             const RecordDecl *RD, const CGRecordLayout &RL,
1351                             ArrayRef<llvm::Constant *> Data) {
1352  llvm::StructType *StructTy = RL.getLLVMType();
1353  unsigned PrevIdx = 0;
1354  ConstantInitBuilder CIBuilder(CGM);
1355  auto DI = Data.begin();
1356  for (const FieldDecl *FD : RD->fields()) {
1357    unsigned Idx = RL.getLLVMFieldNo(FD);
1358    // Fill the alignment.
1359    for (unsigned I = PrevIdx; I < Idx; ++I)
1360      Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1361    PrevIdx = Idx + 1;
1362    Fields.add(*DI);
1363    ++DI;
1364  }
1365}
1366
1367template <class... As>
1368static llvm::GlobalVariable *
1369createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1370                   ArrayRef<llvm::Constant *> Data, const Twine &Name,
1371                   As &&... Args) {
1372  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1373  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1374  ConstantInitBuilder CIBuilder(CGM);
1375  ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1376  buildStructValue(Fields, CGM, RD, RL, Data);
1377  return Fields.finishAndCreateGlobal(
1378      Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1379      std::forward<As>(Args)...);
1380}
1381
1382template <typename T>
1383static void
1384createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1385                                         ArrayRef<llvm::Constant *> Data,
1386                                         T &Parent) {
1387  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1388  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1389  ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1390  buildStructValue(Fields, CGM, RD, RL, Data);
1391  Fields.finishAndAddTo(Parent);
1392}
1393
1394void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1395                                             bool AtCurrentPoint) {
1396  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1397  assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1398
1399  llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1400  if (AtCurrentPoint) {
1401    Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1402        Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1403  } else {
1404    Elem.second.ServiceInsertPt =
1405        new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1406    Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1407  }
1408}
1409
1410void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1411  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1412  if (Elem.second.ServiceInsertPt) {
1413    llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1414    Elem.second.ServiceInsertPt = nullptr;
1415    Ptr->eraseFromParent();
1416  }
1417}
1418
1419static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1420                                                  SourceLocation Loc,
1421                                                  SmallString<128> &Buffer) {
1422  llvm::raw_svector_ostream OS(Buffer);
1423  // Build debug location
1424  PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1425  OS << ";" << PLoc.getFilename() << ";";
1426  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1427    OS << FD->getQualifiedNameAsString();
1428  OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1429  return OS.str();
1430}
1431
1432llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1433                                                 SourceLocation Loc,
1434                                                 unsigned Flags) {
1435  llvm::Constant *SrcLocStr;
1436  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1437      Loc.isInvalid()) {
1438    SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1439  } else {
1440    std::string FunctionName = "";
1441    if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1442      FunctionName = FD->getQualifiedNameAsString();
1443    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1444    const char *FileName = PLoc.getFilename();
1445    unsigned Line = PLoc.getLine();
1446    unsigned Column = PLoc.getColumn();
1447    SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1448                                                Line, Column);
1449  }
1450  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1451  return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1452                                     Reserved2Flags);
1453}
1454
1455llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1456                                          SourceLocation Loc) {
1457  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1458  // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1459  // the clang invariants used below might be broken.
1460  if (CGM.getLangOpts().OpenMPIRBuilder) {
1461    SmallString<128> Buffer;
1462    OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1463    auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1464        getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1465    return OMPBuilder.getOrCreateThreadID(
1466        OMPBuilder.getOrCreateIdent(SrcLocStr));
1467  }
1468
1469  llvm::Value *ThreadID = nullptr;
1470  // Check whether we've already cached a load of the thread id in this
1471  // function.
1472  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1473  if (I != OpenMPLocThreadIDMap.end()) {
1474    ThreadID = I->second.ThreadID;
1475    if (ThreadID != nullptr)
1476      return ThreadID;
1477  }
1478  // If exceptions are enabled, do not use parameter to avoid possible crash.
1479  if (auto *OMPRegionInfo =
1480          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1481    if (OMPRegionInfo->getThreadIDVariable()) {
1482      // Check if this an outlined function with thread id passed as argument.
1483      LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1484      llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1485      if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1486          !CGF.getLangOpts().CXXExceptions ||
1487          CGF.Builder.GetInsertBlock() == TopBlock ||
1488          !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1489          cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1490              TopBlock ||
1491          cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1492              CGF.Builder.GetInsertBlock()) {
1493        ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1494        // If value loaded in entry block, cache it and use it everywhere in
1495        // function.
1496        if (CGF.Builder.GetInsertBlock() == TopBlock) {
1497          auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498          Elem.second.ThreadID = ThreadID;
1499        }
1500        return ThreadID;
1501      }
1502    }
1503  }
1504
1505  // This is not an outlined function region - need to call __kmpc_int32
1506  // kmpc_global_thread_num(ident_t *loc).
1507  // Generate thread id value and cache this value for use across the
1508  // function.
1509  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1510  if (!Elem.second.ServiceInsertPt)
1511    setLocThreadIdInsertPt(CGF);
1512  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1513  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1514  llvm::CallInst *Call = CGF.Builder.CreateCall(
1515      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1516                                            OMPRTL___kmpc_global_thread_num),
1517      emitUpdateLocation(CGF, Loc));
1518  Call->setCallingConv(CGF.getRuntimeCC());
1519  Elem.second.ThreadID = Call;
1520  return Call;
1521}
1522
1523void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1524  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1525  if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1526    clearLocThreadIdInsertPt(CGF);
1527    OpenMPLocThreadIDMap.erase(CGF.CurFn);
1528  }
1529  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1530    for(const auto *D : FunctionUDRMap[CGF.CurFn])
1531      UDRMap.erase(D);
1532    FunctionUDRMap.erase(CGF.CurFn);
1533  }
1534  auto I = FunctionUDMMap.find(CGF.CurFn);
1535  if (I != FunctionUDMMap.end()) {
1536    for(const auto *D : I->second)
1537      UDMMap.erase(D);
1538    FunctionUDMMap.erase(I);
1539  }
1540  LastprivateConditionalToTypes.erase(CGF.CurFn);
1541  FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1542}
1543
1544llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1545  return OMPBuilder.IdentPtr;
1546}
1547
1548llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1549  if (!Kmpc_MicroTy) {
1550    // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1551    llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1552                                 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1553    Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1554  }
1555  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1556}
1557
1558llvm::FunctionCallee
1559CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1560  assert((IVSize == 32 || IVSize == 64) &&
1561         "IV size is not compatible with the omp runtime");
1562  StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1563                                            : "__kmpc_for_static_init_4u")
1564                                : (IVSigned ? "__kmpc_for_static_init_8"
1565                                            : "__kmpc_for_static_init_8u");
1566  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1567  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1568  llvm::Type *TypeParams[] = {
1569    getIdentTyPointerTy(),                     // loc
1570    CGM.Int32Ty,                               // tid
1571    CGM.Int32Ty,                               // schedtype
1572    llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1573    PtrTy,                                     // p_lower
1574    PtrTy,                                     // p_upper
1575    PtrTy,                                     // p_stride
1576    ITy,                                       // incr
1577    ITy                                        // chunk
1578  };
1579  auto *FnTy =
1580      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1581  return CGM.CreateRuntimeFunction(FnTy, Name);
1582}
1583
1584llvm::FunctionCallee
1585CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1586  assert((IVSize == 32 || IVSize == 64) &&
1587         "IV size is not compatible with the omp runtime");
1588  StringRef Name =
1589      IVSize == 32
1590          ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1591          : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1592  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1593  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1594                               CGM.Int32Ty,           // tid
1595                               CGM.Int32Ty,           // schedtype
1596                               ITy,                   // lower
1597                               ITy,                   // upper
1598                               ITy,                   // stride
1599                               ITy                    // chunk
1600  };
1601  auto *FnTy =
1602      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1603  return CGM.CreateRuntimeFunction(FnTy, Name);
1604}
1605
1606llvm::FunctionCallee
1607CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1608  assert((IVSize == 32 || IVSize == 64) &&
1609         "IV size is not compatible with the omp runtime");
1610  StringRef Name =
1611      IVSize == 32
1612          ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1613          : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1614  llvm::Type *TypeParams[] = {
1615      getIdentTyPointerTy(), // loc
1616      CGM.Int32Ty,           // tid
1617  };
1618  auto *FnTy =
1619      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1620  return CGM.CreateRuntimeFunction(FnTy, Name);
1621}
1622
1623llvm::FunctionCallee
1624CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1625  assert((IVSize == 32 || IVSize == 64) &&
1626         "IV size is not compatible with the omp runtime");
1627  StringRef Name =
1628      IVSize == 32
1629          ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1630          : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1631  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1632  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1633  llvm::Type *TypeParams[] = {
1634    getIdentTyPointerTy(),                     // loc
1635    CGM.Int32Ty,                               // tid
1636    llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1637    PtrTy,                                     // p_lower
1638    PtrTy,                                     // p_upper
1639    PtrTy                                      // p_stride
1640  };
1641  auto *FnTy =
1642      llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1643  return CGM.CreateRuntimeFunction(FnTy, Name);
1644}
1645
1646/// Obtain information that uniquely identifies a target entry. This
1647/// consists of the file and device IDs as well as line number associated with
1648/// the relevant entry source location.
1649static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1650                                     unsigned &DeviceID, unsigned &FileID,
1651                                     unsigned &LineNum) {
1652  SourceManager &SM = C.getSourceManager();
1653
1654  // The loc should be always valid and have a file ID (the user cannot use
1655  // #pragma directives in macros)
1656
1657  assert(Loc.isValid() && "Source location is expected to be always valid.");
1658
1659  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1660  assert(PLoc.isValid() && "Source location is expected to be always valid.");
1661
1662  llvm::sys::fs::UniqueID ID;
1663  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1664    PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1665    assert(PLoc.isValid() && "Source location is expected to be always valid.");
1666    if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1667      SM.getDiagnostics().Report(diag::err_cannot_open_file)
1668          << PLoc.getFilename() << EC.message();
1669  }
1670
1671  DeviceID = ID.getDevice();
1672  FileID = ID.getFile();
1673  LineNum = PLoc.getLine();
1674}
1675
1676Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1677  if (CGM.getLangOpts().OpenMPSimd)
1678    return Address::invalid();
1679  llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1680      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1681  if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1682              (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1683               HasRequiresUnifiedSharedMemory))) {
1684    SmallString<64> PtrName;
1685    {
1686      llvm::raw_svector_ostream OS(PtrName);
1687      OS << CGM.getMangledName(GlobalDecl(VD));
1688      if (!VD->isExternallyVisible()) {
1689        unsigned DeviceID, FileID, Line;
1690        getTargetEntryUniqueInfo(CGM.getContext(),
1691                                 VD->getCanonicalDecl()->getBeginLoc(),
1692                                 DeviceID, FileID, Line);
1693        OS << llvm::format("_%x", FileID);
1694      }
1695      OS << "_decl_tgt_ref_ptr";
1696    }
1697    llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1698    if (!Ptr) {
1699      QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1700      Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1701                                        PtrName);
1702
1703      auto *GV = cast<llvm::GlobalVariable>(Ptr);
1704      GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1705
1706      if (!CGM.getLangOpts().OpenMPIsDevice)
1707        GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1708      registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1709    }
1710    return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1711  }
1712  return Address::invalid();
1713}
1714
1715llvm::Constant *
1716CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1717  assert(!CGM.getLangOpts().OpenMPUseTLS ||
1718         !CGM.getContext().getTargetInfo().isTLSSupported());
1719  // Lookup the entry, lazily creating it if necessary.
1720  std::string Suffix = getName({"cache", ""});
1721  return getOrCreateInternalVariable(
1722      CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1723}
1724
1725Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1726                                                const VarDecl *VD,
1727                                                Address VDAddr,
1728                                                SourceLocation Loc) {
1729  if (CGM.getLangOpts().OpenMPUseTLS &&
1730      CGM.getContext().getTargetInfo().isTLSSupported())
1731    return VDAddr;
1732
1733  llvm::Type *VarTy = VDAddr.getElementType();
1734  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1735                         CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1736                                                       CGM.Int8PtrTy),
1737                         CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1738                         getOrCreateThreadPrivateCache(VD)};
1739  return Address(CGF.EmitRuntimeCall(
1740                     OMPBuilder.getOrCreateRuntimeFunction(
1741                         CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1742                     Args),
1743                 VDAddr.getAlignment());
1744}
1745
1746void CGOpenMPRuntime::emitThreadPrivateVarInit(
1747    CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1748    llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1749  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1750  // library.
1751  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1752  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1753                          CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1754                      OMPLoc);
1755  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1756  // to register constructor/destructor for variable.
1757  llvm::Value *Args[] = {
1758      OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1759      Ctor, CopyCtor, Dtor};
1760  CGF.EmitRuntimeCall(
1761      OMPBuilder.getOrCreateRuntimeFunction(
1762          CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1763      Args);
1764}
1765
1766llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1767    const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1768    bool PerformInit, CodeGenFunction *CGF) {
1769  if (CGM.getLangOpts().OpenMPUseTLS &&
1770      CGM.getContext().getTargetInfo().isTLSSupported())
1771    return nullptr;
1772
1773  VD = VD->getDefinition(CGM.getContext());
1774  if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1775    QualType ASTTy = VD->getType();
1776
1777    llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1778    const Expr *Init = VD->getAnyInitializer();
1779    if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1780      // Generate function that re-emits the declaration's initializer into the
1781      // threadprivate copy of the variable VD
1782      CodeGenFunction CtorCGF(CGM);
1783      FunctionArgList Args;
1784      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1785                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1786                            ImplicitParamDecl::Other);
1787      Args.push_back(&Dst);
1788
1789      const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1790          CGM.getContext().VoidPtrTy, Args);
1791      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1792      std::string Name = getName({"__kmpc_global_ctor_", ""});
1793      llvm::Function *Fn =
1794          CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1795      CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1796                            Args, Loc, Loc);
1797      llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1798          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1799          CGM.getContext().VoidPtrTy, Dst.getLocation());
1800      Address Arg = Address(ArgVal, VDAddr.getAlignment());
1801      Arg = CtorCGF.Builder.CreateElementBitCast(
1802          Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1803      CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1804                               /*IsInitializer=*/true);
1805      ArgVal = CtorCGF.EmitLoadOfScalar(
1806          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1807          CGM.getContext().VoidPtrTy, Dst.getLocation());
1808      CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1809      CtorCGF.FinishFunction();
1810      Ctor = Fn;
1811    }
1812    if (VD->getType().isDestructedType() != QualType::DK_none) {
1813      // Generate function that emits destructor call for the threadprivate copy
1814      // of the variable VD
1815      CodeGenFunction DtorCGF(CGM);
1816      FunctionArgList Args;
1817      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1818                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1819                            ImplicitParamDecl::Other);
1820      Args.push_back(&Dst);
1821
1822      const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1823          CGM.getContext().VoidTy, Args);
1824      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1825      std::string Name = getName({"__kmpc_global_dtor_", ""});
1826      llvm::Function *Fn =
1827          CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1828      auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1829      DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1830                            Loc, Loc);
1831      // Create a scope with an artificial location for the body of this function.
1832      auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1833      llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1834          DtorCGF.GetAddrOfLocalVar(&Dst),
1835          /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1836      DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1837                          DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1838                          DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1839      DtorCGF.FinishFunction();
1840      Dtor = Fn;
1841    }
1842    // Do not emit init function if it is not required.
1843    if (!Ctor && !Dtor)
1844      return nullptr;
1845
1846    llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1847    auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1848                                               /*isVarArg=*/false)
1849                           ->getPointerTo();
1850    // Copying constructor for the threadprivate variable.
1851    // Must be NULL - reserved by runtime, but currently it requires that this
1852    // parameter is always NULL. Otherwise it fires assertion.
1853    CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1854    if (Ctor == nullptr) {
1855      auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1856                                             /*isVarArg=*/false)
1857                         ->getPointerTo();
1858      Ctor = llvm::Constant::getNullValue(CtorTy);
1859    }
1860    if (Dtor == nullptr) {
1861      auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1862                                             /*isVarArg=*/false)
1863                         ->getPointerTo();
1864      Dtor = llvm::Constant::getNullValue(DtorTy);
1865    }
1866    if (!CGF) {
1867      auto *InitFunctionTy =
1868          llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1869      std::string Name = getName({"__omp_threadprivate_init_", ""});
1870      llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1871          InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1872      CodeGenFunction InitCGF(CGM);
1873      FunctionArgList ArgList;
1874      InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1875                            CGM.getTypes().arrangeNullaryFunction(), ArgList,
1876                            Loc, Loc);
1877      emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1878      InitCGF.FinishFunction();
1879      return InitFunction;
1880    }
1881    emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1882  }
1883  return nullptr;
1884}
1885
1886bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1887                                                     llvm::GlobalVariable *Addr,
1888                                                     bool PerformInit) {
1889  if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1890      !CGM.getLangOpts().OpenMPIsDevice)
1891    return false;
1892  Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1893      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1894  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1895      (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1896       HasRequiresUnifiedSharedMemory))
1897    return CGM.getLangOpts().OpenMPIsDevice;
1898  VD = VD->getDefinition(CGM.getContext());
1899  assert(VD && "Unknown VarDecl");
1900
1901  if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1902    return CGM.getLangOpts().OpenMPIsDevice;
1903
1904  QualType ASTTy = VD->getType();
1905  SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1906
1907  // Produce the unique prefix to identify the new target regions. We use
1908  // the source location of the variable declaration which we know to not
1909  // conflict with any target region.
1910  unsigned DeviceID;
1911  unsigned FileID;
1912  unsigned Line;
1913  getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1914  SmallString<128> Buffer, Out;
1915  {
1916    llvm::raw_svector_ostream OS(Buffer);
1917    OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1918       << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1919  }
1920
1921  const Expr *Init = VD->getAnyInitializer();
1922  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1923    llvm::Constant *Ctor;
1924    llvm::Constant *ID;
1925    if (CGM.getLangOpts().OpenMPIsDevice) {
1926      // Generate function that re-emits the declaration's initializer into
1927      // the threadprivate copy of the variable VD
1928      CodeGenFunction CtorCGF(CGM);
1929
1930      const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1931      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1932      llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1933          FTy, Twine(Buffer, "_ctor"), FI, Loc);
1934      auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1935      CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1936                            FunctionArgList(), Loc, Loc);
1937      auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1938      CtorCGF.EmitAnyExprToMem(Init,
1939                               Address(Addr, CGM.getContext().getDeclAlign(VD)),
1940                               Init->getType().getQualifiers(),
1941                               /*IsInitializer=*/true);
1942      CtorCGF.FinishFunction();
1943      Ctor = Fn;
1944      ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1945      CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1946    } else {
1947      Ctor = new llvm::GlobalVariable(
1948          CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1949          llvm::GlobalValue::PrivateLinkage,
1950          llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1951      ID = Ctor;
1952    }
1953
1954    // Register the information for the entry associated with the constructor.
1955    Out.clear();
1956    OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1957        DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1958        ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1959  }
1960  if (VD->getType().isDestructedType() != QualType::DK_none) {
1961    llvm::Constant *Dtor;
1962    llvm::Constant *ID;
1963    if (CGM.getLangOpts().OpenMPIsDevice) {
1964      // Generate function that emits destructor call for the threadprivate
1965      // copy of the variable VD
1966      CodeGenFunction DtorCGF(CGM);
1967
1968      const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1969      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1970      llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1971          FTy, Twine(Buffer, "_dtor"), FI, Loc);
1972      auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1973      DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1974                            FunctionArgList(), Loc, Loc);
1975      // Create a scope with an artificial location for the body of this
1976      // function.
1977      auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1978      DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1979                          ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1980                          DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1981      DtorCGF.FinishFunction();
1982      Dtor = Fn;
1983      ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1984      CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1985    } else {
1986      Dtor = new llvm::GlobalVariable(
1987          CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1988          llvm::GlobalValue::PrivateLinkage,
1989          llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1990      ID = Dtor;
1991    }
1992    // Register the information for the entry associated with the destructor.
1993    Out.clear();
1994    OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1995        DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1996        ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1997  }
1998  return CGM.getLangOpts().OpenMPIsDevice;
1999}
2000
2001Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2002                                                          QualType VarType,
2003                                                          StringRef Name) {
2004  std::string Suffix = getName({"artificial", ""});
2005  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2006  llvm::Value *GAddr =
2007      getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2008  if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2009      CGM.getTarget().isTLSSupported()) {
2010    cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2011    return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2012  }
2013  std::string CacheSuffix = getName({"cache", ""});
2014  llvm::Value *Args[] = {
2015      emitUpdateLocation(CGF, SourceLocation()),
2016      getThreadID(CGF, SourceLocation()),
2017      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2018      CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2019                                /*isSigned=*/false),
2020      getOrCreateInternalVariable(
2021          CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2022  return Address(
2023      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2024          CGF.EmitRuntimeCall(
2025              OMPBuilder.getOrCreateRuntimeFunction(
2026                  CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2027              Args),
2028          VarLVType->getPointerTo(/*AddrSpace=*/0)),
2029      CGM.getContext().getTypeAlignInChars(VarType));
2030}
2031
2032void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2033                                   const RegionCodeGenTy &ThenGen,
2034                                   const RegionCodeGenTy &ElseGen) {
2035  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2036
2037  // If the condition constant folds and can be elided, try to avoid emitting
2038  // the condition and the dead arm of the if/else.
2039  bool CondConstant;
2040  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2041    if (CondConstant)
2042      ThenGen(CGF);
2043    else
2044      ElseGen(CGF);
2045    return;
2046  }
2047
2048  // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2049  // emit the conditional branch.
2050  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2051  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2052  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2053  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2054
2055  // Emit the 'then' code.
2056  CGF.EmitBlock(ThenBlock);
2057  ThenGen(CGF);
2058  CGF.EmitBranch(ContBlock);
2059  // Emit the 'else' code if present.
2060  // There is no need to emit line number for unconditional branch.
2061  (void)ApplyDebugLocation::CreateEmpty(CGF);
2062  CGF.EmitBlock(ElseBlock);
2063  ElseGen(CGF);
2064  // There is no need to emit line number for unconditional branch.
2065  (void)ApplyDebugLocation::CreateEmpty(CGF);
2066  CGF.EmitBranch(ContBlock);
2067  // Emit the continuation block for code after the if.
2068  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2069}
2070
2071void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2072                                       llvm::Function *OutlinedFn,
2073                                       ArrayRef<llvm::Value *> CapturedVars,
2074                                       const Expr *IfCond) {
2075  if (!CGF.HaveInsertPoint())
2076    return;
2077  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2078  auto &M = CGM.getModule();
2079  auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2080                    this](CodeGenFunction &CGF, PrePostActionTy &) {
2081    // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2082    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2083    llvm::Value *Args[] = {
2084        RTLoc,
2085        CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2086        CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2087    llvm::SmallVector<llvm::Value *, 16> RealArgs;
2088    RealArgs.append(std::begin(Args), std::end(Args));
2089    RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2090
2091    llvm::FunctionCallee RTLFn =
2092        OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2093    CGF.EmitRuntimeCall(RTLFn, RealArgs);
2094  };
2095  auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2096                    this](CodeGenFunction &CGF, PrePostActionTy &) {
2097    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2098    llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2099    // Build calls:
2100    // __kmpc_serialized_parallel(&Loc, GTid);
2101    llvm::Value *Args[] = {RTLoc, ThreadID};
2102    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2103                            M, OMPRTL___kmpc_serialized_parallel),
2104                        Args);
2105
2106    // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2107    Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2108    Address ZeroAddrBound =
2109        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2110                                         /*Name=*/".bound.zero.addr");
2111    CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2112    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2113    // ThreadId for serialized parallels is 0.
2114    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2115    OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2116    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2117
2118    // Ensure we do not inline the function. This is trivially true for the ones
2119    // passed to __kmpc_fork_call but the ones calles in serialized regions
2120    // could be inlined. This is not a perfect but it is closer to the invariant
2121    // we want, namely, every data environment starts with a new function.
2122    // TODO: We should pass the if condition to the runtime function and do the
2123    //       handling there. Much cleaner code.
2124    OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2125    RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2126
2127    // __kmpc_end_serialized_parallel(&Loc, GTid);
2128    llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2129    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2130                            M, OMPRTL___kmpc_end_serialized_parallel),
2131                        EndArgs);
2132  };
2133  if (IfCond) {
2134    emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2135  } else {
2136    RegionCodeGenTy ThenRCG(ThenGen);
2137    ThenRCG(CGF);
2138  }
2139}
2140
2141// If we're inside an (outlined) parallel region, use the region info's
2142// thread-ID variable (it is passed in a first argument of the outlined function
2143// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2144// regular serial code region, get thread ID by calling kmp_int32
2145// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2146// return the address of that temp.
2147Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2148                                             SourceLocation Loc) {
2149  if (auto *OMPRegionInfo =
2150          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2151    if (OMPRegionInfo->getThreadIDVariable())
2152      return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2153
2154  llvm::Value *ThreadID = getThreadID(CGF, Loc);
2155  QualType Int32Ty =
2156      CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2157  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2158  CGF.EmitStoreOfScalar(ThreadID,
2159                        CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2160
2161  return ThreadIDTemp;
2162}
2163
2164llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2165    llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2166  SmallString<256> Buffer;
2167  llvm::raw_svector_ostream Out(Buffer);
2168  Out << Name;
2169  StringRef RuntimeName = Out.str();
2170  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2171  if (Elem.second) {
2172    assert(Elem.second->getType()->getPointerElementType() == Ty &&
2173           "OMP internal variable has different type than requested");
2174    return &*Elem.second;
2175  }
2176
2177  return Elem.second = new llvm::GlobalVariable(
2178             CGM.getModule(), Ty, /*IsConstant*/ false,
2179             llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2180             Elem.first(), /*InsertBefore=*/nullptr,
2181             llvm::GlobalValue::NotThreadLocal, AddressSpace);
2182}
2183
2184llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2185  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2186  std::string Name = getName({Prefix, "var"});
2187  return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2188}
2189
2190namespace {
2191/// Common pre(post)-action for different OpenMP constructs.
2192class CommonActionTy final : public PrePostActionTy {
2193  llvm::FunctionCallee EnterCallee;
2194  ArrayRef<llvm::Value *> EnterArgs;
2195  llvm::FunctionCallee ExitCallee;
2196  ArrayRef<llvm::Value *> ExitArgs;
2197  bool Conditional;
2198  llvm::BasicBlock *ContBlock = nullptr;
2199
2200public:
2201  CommonActionTy(llvm::FunctionCallee EnterCallee,
2202                 ArrayRef<llvm::Value *> EnterArgs,
2203                 llvm::FunctionCallee ExitCallee,
2204                 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2205      : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2206        ExitArgs(ExitArgs), Conditional(Conditional) {}
2207  void Enter(CodeGenFunction &CGF) override {
2208    llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2209    if (Conditional) {
2210      llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2211      auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2212      ContBlock = CGF.createBasicBlock("omp_if.end");
2213      // Generate the branch (If-stmt)
2214      CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2215      CGF.EmitBlock(ThenBlock);
2216    }
2217  }
2218  void Done(CodeGenFunction &CGF) {
2219    // Emit the rest of blocks/branches
2220    CGF.EmitBranch(ContBlock);
2221    CGF.EmitBlock(ContBlock, true);
2222  }
2223  void Exit(CodeGenFunction &CGF) override {
2224    CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2225  }
2226};
2227} // anonymous namespace
2228
2229void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2230                                         StringRef CriticalName,
2231                                         const RegionCodeGenTy &CriticalOpGen,
2232                                         SourceLocation Loc, const Expr *Hint) {
2233  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2234  // CriticalOpGen();
2235  // __kmpc_end_critical(ident_t *, gtid, Lock);
2236  // Prepare arguments and build a call to __kmpc_critical
2237  if (!CGF.HaveInsertPoint())
2238    return;
2239  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2240                         getCriticalRegionLock(CriticalName)};
2241  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2242                                                std::end(Args));
2243  if (Hint) {
2244    EnterArgs.push_back(CGF.Builder.CreateIntCast(
2245        CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2246  }
2247  CommonActionTy Action(
2248      OMPBuilder.getOrCreateRuntimeFunction(
2249          CGM.getModule(),
2250          Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2251      EnterArgs,
2252      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2253                                            OMPRTL___kmpc_end_critical),
2254      Args);
2255  CriticalOpGen.setAction(Action);
2256  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2257}
2258
2259void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2260                                       const RegionCodeGenTy &MasterOpGen,
2261                                       SourceLocation Loc) {
2262  if (!CGF.HaveInsertPoint())
2263    return;
2264  // if(__kmpc_master(ident_t *, gtid)) {
2265  //   MasterOpGen();
2266  //   __kmpc_end_master(ident_t *, gtid);
2267  // }
2268  // Prepare arguments and build a call to __kmpc_master
2269  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2270  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2271                            CGM.getModule(), OMPRTL___kmpc_master),
2272                        Args,
2273                        OMPBuilder.getOrCreateRuntimeFunction(
2274                            CGM.getModule(), OMPRTL___kmpc_end_master),
2275                        Args,
2276                        /*Conditional=*/true);
2277  MasterOpGen.setAction(Action);
2278  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2279  Action.Done(CGF);
2280}
2281
2282void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2283                                       const RegionCodeGenTy &MaskedOpGen,
2284                                       SourceLocation Loc, const Expr *Filter) {
2285  if (!CGF.HaveInsertPoint())
2286    return;
2287  // if(__kmpc_masked(ident_t *, gtid, filter)) {
2288  //   MaskedOpGen();
2289  //   __kmpc_end_masked(iden_t *, gtid);
2290  // }
2291  // Prepare arguments and build a call to __kmpc_masked
2292  llvm::Value *FilterVal = Filter
2293                               ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2294                               : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2295  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2296                         FilterVal};
2297  llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2298                            getThreadID(CGF, Loc)};
2299  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2300                            CGM.getModule(), OMPRTL___kmpc_masked),
2301                        Args,
2302                        OMPBuilder.getOrCreateRuntimeFunction(
2303                            CGM.getModule(), OMPRTL___kmpc_end_masked),
2304                        ArgsEnd,
2305                        /*Conditional=*/true);
2306  MaskedOpGen.setAction(Action);
2307  emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2308  Action.Done(CGF);
2309}
2310
2311void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2312                                        SourceLocation Loc) {
2313  if (!CGF.HaveInsertPoint())
2314    return;
2315  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2316    OMPBuilder.createTaskyield(CGF.Builder);
2317  } else {
2318    // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2319    llvm::Value *Args[] = {
2320        emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2321        llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2322    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2323                            CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2324                        Args);
2325  }
2326
2327  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2328    Region->emitUntiedSwitch(CGF);
2329}
2330
2331void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2332                                          const RegionCodeGenTy &TaskgroupOpGen,
2333                                          SourceLocation Loc) {
2334  if (!CGF.HaveInsertPoint())
2335    return;
2336  // __kmpc_taskgroup(ident_t *, gtid);
2337  // TaskgroupOpGen();
2338  // __kmpc_end_taskgroup(ident_t *, gtid);
2339  // Prepare arguments and build a call to __kmpc_taskgroup
2340  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2341  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2342                            CGM.getModule(), OMPRTL___kmpc_taskgroup),
2343                        Args,
2344                        OMPBuilder.getOrCreateRuntimeFunction(
2345                            CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2346                        Args);
2347  TaskgroupOpGen.setAction(Action);
2348  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2349}
2350
2351/// Given an array of pointers to variables, project the address of a
2352/// given variable.
2353static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2354                                      unsigned Index, const VarDecl *Var) {
2355  // Pull out the pointer to the variable.
2356  Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2357  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2358
2359  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2360  Addr = CGF.Builder.CreateElementBitCast(
2361      Addr, CGF.ConvertTypeForMem(Var->getType()));
2362  return Addr;
2363}
2364
2365static llvm::Value *emitCopyprivateCopyFunction(
2366    CodeGenModule &CGM, llvm::Type *ArgsType,
2367    ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2368    ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2369    SourceLocation Loc) {
2370  ASTContext &C = CGM.getContext();
2371  // void copy_func(void *LHSArg, void *RHSArg);
2372  FunctionArgList Args;
2373  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2374                           ImplicitParamDecl::Other);
2375  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2376                           ImplicitParamDecl::Other);
2377  Args.push_back(&LHSArg);
2378  Args.push_back(&RHSArg);
2379  const auto &CGFI =
2380      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2381  std::string Name =
2382      CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2383  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2384                                    llvm::GlobalValue::InternalLinkage, Name,
2385                                    &CGM.getModule());
2386  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2387  Fn->setDoesNotRecurse();
2388  CodeGenFunction CGF(CGM);
2389  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2390  // Dest = (void*[n])(LHSArg);
2391  // Src = (void*[n])(RHSArg);
2392  Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2393      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2394      ArgsType), CGF.getPointerAlign());
2395  Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2396      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2397      ArgsType), CGF.getPointerAlign());
2398  // *(Type0*)Dst[0] = *(Type0*)Src[0];
2399  // *(Type1*)Dst[1] = *(Type1*)Src[1];
2400  // ...
2401  // *(Typen*)Dst[n] = *(Typen*)Src[n];
2402  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2403    const auto *DestVar =
2404        cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2405    Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2406
2407    const auto *SrcVar =
2408        cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2409    Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2410
2411    const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2412    QualType Type = VD->getType();
2413    CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2414  }
2415  CGF.FinishFunction();
2416  return Fn;
2417}
2418
2419void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2420                                       const RegionCodeGenTy &SingleOpGen,
2421                                       SourceLocation Loc,
2422                                       ArrayRef<const Expr *> CopyprivateVars,
2423                                       ArrayRef<const Expr *> SrcExprs,
2424                                       ArrayRef<const Expr *> DstExprs,
2425                                       ArrayRef<const Expr *> AssignmentOps) {
2426  if (!CGF.HaveInsertPoint())
2427    return;
2428  assert(CopyprivateVars.size() == SrcExprs.size() &&
2429         CopyprivateVars.size() == DstExprs.size() &&
2430         CopyprivateVars.size() == AssignmentOps.size());
2431  ASTContext &C = CGM.getContext();
2432  // int32 did_it = 0;
2433  // if(__kmpc_single(ident_t *, gtid)) {
2434  //   SingleOpGen();
2435  //   __kmpc_end_single(ident_t *, gtid);
2436  //   did_it = 1;
2437  // }
2438  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2439  // <copy_func>, did_it);
2440
2441  Address DidIt = Address::invalid();
2442  if (!CopyprivateVars.empty()) {
2443    // int32 did_it = 0;
2444    QualType KmpInt32Ty =
2445        C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2446    DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2447    CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2448  }
2449  // Prepare arguments and build a call to __kmpc_single
2450  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2451  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2452                            CGM.getModule(), OMPRTL___kmpc_single),
2453                        Args,
2454                        OMPBuilder.getOrCreateRuntimeFunction(
2455                            CGM.getModule(), OMPRTL___kmpc_end_single),
2456                        Args,
2457                        /*Conditional=*/true);
2458  SingleOpGen.setAction(Action);
2459  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2460  if (DidIt.isValid()) {
2461    // did_it = 1;
2462    CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2463  }
2464  Action.Done(CGF);
2465  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2466  // <copy_func>, did_it);
2467  if (DidIt.isValid()) {
2468    llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2469    QualType CopyprivateArrayTy = C.getConstantArrayType(
2470        C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2471        /*IndexTypeQuals=*/0);
2472    // Create a list of all private variables for copyprivate.
2473    Address CopyprivateList =
2474        CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2475    for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2476      Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2477      CGF.Builder.CreateStore(
2478          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2479              CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2480              CGF.VoidPtrTy),
2481          Elem);
2482    }
2483    // Build function that copies private values from single region to all other
2484    // threads in the corresponding parallel region.
2485    llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2486        CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2487        CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2488    llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2489    Address CL =
2490      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2491                                                      CGF.VoidPtrTy);
2492    llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2493    llvm::Value *Args[] = {
2494        emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2495        getThreadID(CGF, Loc),        // i32 <gtid>
2496        BufSize,                      // size_t <buf_size>
2497        CL.getPointer(),              // void *<copyprivate list>
2498        CpyFn,                        // void (*) (void *, void *) <copy_func>
2499        DidItVal                      // i32 did_it
2500    };
2501    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2502                            CGM.getModule(), OMPRTL___kmpc_copyprivate),
2503                        Args);
2504  }
2505}
2506
2507void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2508                                        const RegionCodeGenTy &OrderedOpGen,
2509                                        SourceLocation Loc, bool IsThreads) {
2510  if (!CGF.HaveInsertPoint())
2511    return;
2512  // __kmpc_ordered(ident_t *, gtid);
2513  // OrderedOpGen();
2514  // __kmpc_end_ordered(ident_t *, gtid);
2515  // Prepare arguments and build a call to __kmpc_ordered
2516  if (IsThreads) {
2517    llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2518    CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2519                              CGM.getModule(), OMPRTL___kmpc_ordered),
2520                          Args,
2521                          OMPBuilder.getOrCreateRuntimeFunction(
2522                              CGM.getModule(), OMPRTL___kmpc_end_ordered),
2523                          Args);
2524    OrderedOpGen.setAction(Action);
2525    emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2526    return;
2527  }
2528  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2529}
2530
2531unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2532  unsigned Flags;
2533  if (Kind == OMPD_for)
2534    Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2535  else if (Kind == OMPD_sections)
2536    Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2537  else if (Kind == OMPD_single)
2538    Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2539  else if (Kind == OMPD_barrier)
2540    Flags = OMP_IDENT_BARRIER_EXPL;
2541  else
2542    Flags = OMP_IDENT_BARRIER_IMPL;
2543  return Flags;
2544}
2545
2546void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2547    CodeGenFunction &CGF, const OMPLoopDirective &S,
2548    OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2549  // Check if the loop directive is actually a doacross loop directive. In this
2550  // case choose static, 1 schedule.
2551  if (llvm::any_of(
2552          S.getClausesOfKind<OMPOrderedClause>(),
2553          [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2554    ScheduleKind = OMPC_SCHEDULE_static;
2555    // Chunk size is 1 in this case.
2556    llvm::APInt ChunkSize(32, 1);
2557    ChunkExpr = IntegerLiteral::Create(
2558        CGF.getContext(), ChunkSize,
2559        CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2560        SourceLocation());
2561  }
2562}
2563
2564void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2565                                      OpenMPDirectiveKind Kind, bool EmitChecks,
2566                                      bool ForceSimpleCall) {
2567  // Check if we should use the OMPBuilder
2568  auto *OMPRegionInfo =
2569      dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2570  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2571    CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2572        CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2573    return;
2574  }
2575
2576  if (!CGF.HaveInsertPoint())
2577    return;
2578  // Build call __kmpc_cancel_barrier(loc, thread_id);
2579  // Build call __kmpc_barrier(loc, thread_id);
2580  unsigned Flags = getDefaultFlagsForBarriers(Kind);
2581  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2582  // thread_id);
2583  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2584                         getThreadID(CGF, Loc)};
2585  if (OMPRegionInfo) {
2586    if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2587      llvm::Value *Result = CGF.EmitRuntimeCall(
2588          OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2589                                                OMPRTL___kmpc_cancel_barrier),
2590          Args);
2591      if (EmitChecks) {
2592        // if (__kmpc_cancel_barrier()) {
2593        //   exit from construct;
2594        // }
2595        llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2596        llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2597        llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2598        CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2599        CGF.EmitBlock(ExitBB);
2600        //   exit from construct;
2601        CodeGenFunction::JumpDest CancelDestination =
2602            CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2603        CGF.EmitBranchThroughCleanup(CancelDestination);
2604        CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2605      }
2606      return;
2607    }
2608  }
2609  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2610                          CGM.getModule(), OMPRTL___kmpc_barrier),
2611                      Args);
2612}
2613
2614/// Map the OpenMP loop schedule to the runtime enumeration.
2615static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2616                                          bool Chunked, bool Ordered) {
2617  switch (ScheduleKind) {
2618  case OMPC_SCHEDULE_static:
2619    return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2620                   : (Ordered ? OMP_ord_static : OMP_sch_static);
2621  case OMPC_SCHEDULE_dynamic:
2622    return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2623  case OMPC_SCHEDULE_guided:
2624    return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2625  case OMPC_SCHEDULE_runtime:
2626    return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2627  case OMPC_SCHEDULE_auto:
2628    return Ordered ? OMP_ord_auto : OMP_sch_auto;
2629  case OMPC_SCHEDULE_unknown:
2630    assert(!Chunked && "chunk was specified but schedule kind not known");
2631    return Ordered ? OMP_ord_static : OMP_sch_static;
2632  }
2633  llvm_unreachable("Unexpected runtime schedule");
2634}
2635
2636/// Map the OpenMP distribute schedule to the runtime enumeration.
2637static OpenMPSchedType
2638getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2639  // only static is allowed for dist_schedule
2640  return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2641}
2642
2643bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2644                                         bool Chunked) const {
2645  OpenMPSchedType Schedule =
2646      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2647  return Schedule == OMP_sch_static;
2648}
2649
2650bool CGOpenMPRuntime::isStaticNonchunked(
2651    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2652  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2653  return Schedule == OMP_dist_sch_static;
2654}
2655
2656bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2657                                      bool Chunked) const {
2658  OpenMPSchedType Schedule =
2659      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2660  return Schedule == OMP_sch_static_chunked;
2661}
2662
2663bool CGOpenMPRuntime::isStaticChunked(
2664    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2665  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2666  return Schedule == OMP_dist_sch_static_chunked;
2667}
2668
2669bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2670  OpenMPSchedType Schedule =
2671      getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2672  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2673  return Schedule != OMP_sch_static;
2674}
2675
2676static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2677                                  OpenMPScheduleClauseModifier M1,
2678                                  OpenMPScheduleClauseModifier M2) {
2679  int Modifier = 0;
2680  switch (M1) {
2681  case OMPC_SCHEDULE_MODIFIER_monotonic:
2682    Modifier = OMP_sch_modifier_monotonic;
2683    break;
2684  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2685    Modifier = OMP_sch_modifier_nonmonotonic;
2686    break;
2687  case OMPC_SCHEDULE_MODIFIER_simd:
2688    if (Schedule == OMP_sch_static_chunked)
2689      Schedule = OMP_sch_static_balanced_chunked;
2690    break;
2691  case OMPC_SCHEDULE_MODIFIER_last:
2692  case OMPC_SCHEDULE_MODIFIER_unknown:
2693    break;
2694  }
2695  switch (M2) {
2696  case OMPC_SCHEDULE_MODIFIER_monotonic:
2697    Modifier = OMP_sch_modifier_monotonic;
2698    break;
2699  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2700    Modifier = OMP_sch_modifier_nonmonotonic;
2701    break;
2702  case OMPC_SCHEDULE_MODIFIER_simd:
2703    if (Schedule == OMP_sch_static_chunked)
2704      Schedule = OMP_sch_static_balanced_chunked;
2705    break;
2706  case OMPC_SCHEDULE_MODIFIER_last:
2707  case OMPC_SCHEDULE_MODIFIER_unknown:
2708    break;
2709  }
2710  // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2711  // If the static schedule kind is specified or if the ordered clause is
2712  // specified, and if the nonmonotonic modifier is not specified, the effect is
2713  // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2714  // modifier is specified, the effect is as if the nonmonotonic modifier is
2715  // specified.
2716  if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2717    if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2718          Schedule == OMP_sch_static_balanced_chunked ||
2719          Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2720          Schedule == OMP_dist_sch_static_chunked ||
2721          Schedule == OMP_dist_sch_static))
2722      Modifier = OMP_sch_modifier_nonmonotonic;
2723  }
2724  return Schedule | Modifier;
2725}
2726
2727void CGOpenMPRuntime::emitForDispatchInit(
2728    CodeGenFunction &CGF, SourceLocation Loc,
2729    const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2730    bool Ordered, const DispatchRTInput &DispatchValues) {
2731  if (!CGF.HaveInsertPoint())
2732    return;
2733  OpenMPSchedType Schedule = getRuntimeSchedule(
2734      ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2735  assert(Ordered ||
2736         (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2737          Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2738          Schedule != OMP_sch_static_balanced_chunked));
2739  // Call __kmpc_dispatch_init(
2740  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2741  //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2742  //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2743
2744  // If the Chunk was not specified in the clause - use default value 1.
2745  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2746                                            : CGF.Builder.getIntN(IVSize, 1);
2747  llvm::Value *Args[] = {
2748      emitUpdateLocation(CGF, Loc),
2749      getThreadID(CGF, Loc),
2750      CGF.Builder.getInt32(addMonoNonMonoModifier(
2751          CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2752      DispatchValues.LB,                                     // Lower
2753      DispatchValues.UB,                                     // Upper
2754      CGF.Builder.getIntN(IVSize, 1),                        // Stride
2755      Chunk                                                  // Chunk
2756  };
2757  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2758}
2759
2760static void emitForStaticInitCall(
2761    CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2762    llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2763    OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2764    const CGOpenMPRuntime::StaticRTInput &Values) {
2765  if (!CGF.HaveInsertPoint())
2766    return;
2767
2768  assert(!Values.Ordered);
2769  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2770         Schedule == OMP_sch_static_balanced_chunked ||
2771         Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2772         Schedule == OMP_dist_sch_static ||
2773         Schedule == OMP_dist_sch_static_chunked);
2774
2775  // Call __kmpc_for_static_init(
2776  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2777  //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2778  //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2779  //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2780  llvm::Value *Chunk = Values.Chunk;
2781  if (Chunk == nullptr) {
2782    assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2783            Schedule == OMP_dist_sch_static) &&
2784           "expected static non-chunked schedule");
2785    // If the Chunk was not specified in the clause - use default value 1.
2786    Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2787  } else {
2788    assert((Schedule == OMP_sch_static_chunked ||
2789            Schedule == OMP_sch_static_balanced_chunked ||
2790            Schedule == OMP_ord_static_chunked ||
2791            Schedule == OMP_dist_sch_static_chunked) &&
2792           "expected static chunked schedule");
2793  }
2794  llvm::Value *Args[] = {
2795      UpdateLocation,
2796      ThreadId,
2797      CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2798                                                  M2)), // Schedule type
2799      Values.IL.getPointer(),                           // &isLastIter
2800      Values.LB.getPointer(),                           // &LB
2801      Values.UB.getPointer(),                           // &UB
2802      Values.ST.getPointer(),                           // &Stride
2803      CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2804      Chunk                                             // Chunk
2805  };
2806  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2807}
2808
2809void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2810                                        SourceLocation Loc,
2811                                        OpenMPDirectiveKind DKind,
2812                                        const OpenMPScheduleTy &ScheduleKind,
2813                                        const StaticRTInput &Values) {
2814  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2815      ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2816  assert(isOpenMPWorksharingDirective(DKind) &&
2817         "Expected loop-based or sections-based directive.");
2818  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2819                                             isOpenMPLoopDirective(DKind)
2820                                                 ? OMP_IDENT_WORK_LOOP
2821                                                 : OMP_IDENT_WORK_SECTIONS);
2822  llvm::Value *ThreadId = getThreadID(CGF, Loc);
2823  llvm::FunctionCallee StaticInitFunction =
2824      createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2825  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2826  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2827                        ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2828}
2829
2830void CGOpenMPRuntime::emitDistributeStaticInit(
2831    CodeGenFunction &CGF, SourceLocation Loc,
2832    OpenMPDistScheduleClauseKind SchedKind,
2833    const CGOpenMPRuntime::StaticRTInput &Values) {
2834  OpenMPSchedType ScheduleNum =
2835      getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2836  llvm::Value *UpdatedLocation =
2837      emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2838  llvm::Value *ThreadId = getThreadID(CGF, Loc);
2839  llvm::FunctionCallee StaticInitFunction =
2840      createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2841  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2842                        ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2843                        OMPC_SCHEDULE_MODIFIER_unknown, Values);
2844}
2845
2846void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2847                                          SourceLocation Loc,
2848                                          OpenMPDirectiveKind DKind) {
2849  if (!CGF.HaveInsertPoint())
2850    return;
2851  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2852  llvm::Value *Args[] = {
2853      emitUpdateLocation(CGF, Loc,
2854                         isOpenMPDistributeDirective(DKind)
2855                             ? OMP_IDENT_WORK_DISTRIBUTE
2856                             : isOpenMPLoopDirective(DKind)
2857                                   ? OMP_IDENT_WORK_LOOP
2858                                   : OMP_IDENT_WORK_SECTIONS),
2859      getThreadID(CGF, Loc)};
2860  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2861  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2862                          CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2863                      Args);
2864}
2865
2866void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2867                                                 SourceLocation Loc,
2868                                                 unsigned IVSize,
2869                                                 bool IVSigned) {
2870  if (!CGF.HaveInsertPoint())
2871    return;
2872  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2873  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2874  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2875}
2876
2877llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2878                                          SourceLocation Loc, unsigned IVSize,
2879                                          bool IVSigned, Address IL,
2880                                          Address LB, Address UB,
2881                                          Address ST) {
2882  // Call __kmpc_dispatch_next(
2883  //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2884  //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2885  //          kmp_int[32|64] *p_stride);
2886  llvm::Value *Args[] = {
2887      emitUpdateLocation(CGF, Loc),
2888      getThreadID(CGF, Loc),
2889      IL.getPointer(), // &isLastIter
2890      LB.getPointer(), // &Lower
2891      UB.getPointer(), // &Upper
2892      ST.getPointer()  // &Stride
2893  };
2894  llvm::Value *Call =
2895      CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2896  return CGF.EmitScalarConversion(
2897      Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2898      CGF.getContext().BoolTy, Loc);
2899}
2900
2901void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2902                                           llvm::Value *NumThreads,
2903                                           SourceLocation Loc) {
2904  if (!CGF.HaveInsertPoint())
2905    return;
2906  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2907  llvm::Value *Args[] = {
2908      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2909      CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2910  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2911                          CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2912                      Args);
2913}
2914
2915void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2916                                         ProcBindKind ProcBind,
2917                                         SourceLocation Loc) {
2918  if (!CGF.HaveInsertPoint())
2919    return;
2920  assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2921  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2922  llvm::Value *Args[] = {
2923      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2924      llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2925  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2926                          CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2927                      Args);
2928}
2929
2930void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2931                                SourceLocation Loc, llvm::AtomicOrdering AO) {
2932  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2933    OMPBuilder.createFlush(CGF.Builder);
2934  } else {
2935    if (!CGF.HaveInsertPoint())
2936      return;
2937    // Build call void __kmpc_flush(ident_t *loc)
2938    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2939                            CGM.getModule(), OMPRTL___kmpc_flush),
2940                        emitUpdateLocation(CGF, Loc));
2941  }
2942}
2943
2944namespace {
2945/// Indexes of fields for type kmp_task_t.
2946enum KmpTaskTFields {
2947  /// List of shared variables.
2948  KmpTaskTShareds,
2949  /// Task routine.
2950  KmpTaskTRoutine,
2951  /// Partition id for the untied tasks.
2952  KmpTaskTPartId,
2953  /// Function with call of destructors for private variables.
2954  Data1,
2955  /// Task priority.
2956  Data2,
2957  /// (Taskloops only) Lower bound.
2958  KmpTaskTLowerBound,
2959  /// (Taskloops only) Upper bound.
2960  KmpTaskTUpperBound,
2961  /// (Taskloops only) Stride.
2962  KmpTaskTStride,
2963  /// (Taskloops only) Is last iteration flag.
2964  KmpTaskTLastIter,
2965  /// (Taskloops only) Reduction data.
2966  KmpTaskTReductions,
2967};
2968} // anonymous namespace
2969
2970bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2971  return OffloadEntriesTargetRegion.empty() &&
2972         OffloadEntriesDeviceGlobalVar.empty();
2973}
2974
2975/// Initialize target region entry.
2976void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2977    initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2978                                    StringRef ParentName, unsigned LineNum,
2979                                    unsigned Order) {
2980  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2981                                             "only required for the device "
2982                                             "code generation.");
2983  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2984      OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2985                                   OMPTargetRegionEntryTargetRegion);
2986  ++OffloadingEntriesNum;
2987}
2988
2989void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2990    registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2991                                  StringRef ParentName, unsigned LineNum,
2992                                  llvm::Constant *Addr, llvm::Constant *ID,
2993                                  OMPTargetRegionEntryKind Flags) {
2994  // If we are emitting code for a target, the entry is already initialized,
2995  // only has to be registered.
2996  if (CGM.getLangOpts().OpenMPIsDevice) {
2997    // This could happen if the device compilation is invoked standalone.
2998    if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2999      return;
3000    auto &Entry =
3001        OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3002    Entry.setAddress(Addr);
3003    Entry.setID(ID);
3004    Entry.setFlags(Flags);
3005  } else {
3006    if (Flags ==
3007            OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3008        hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3009                                 /*IgnoreAddressId*/ true))
3010      return;
3011    assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3012           "Target region entry already registered!");
3013    OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3014    OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3015    ++OffloadingEntriesNum;
3016  }
3017}
3018
3019bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3020    unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3021    bool IgnoreAddressId) const {
3022  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3023  if (PerDevice == OffloadEntriesTargetRegion.end())
3024    return false;
3025  auto PerFile = PerDevice->second.find(FileID);
3026  if (PerFile == PerDevice->second.end())
3027    return false;
3028  auto PerParentName = PerFile->second.find(ParentName);
3029  if (PerParentName == PerFile->second.end())
3030    return false;
3031  auto PerLine = PerParentName->second.find(LineNum);
3032  if (PerLine == PerParentName->second.end())
3033    return false;
3034  // Fail if this entry is already registered.
3035  if (!IgnoreAddressId &&
3036      (PerLine->second.getAddress() || PerLine->second.getID()))
3037    return false;
3038  return true;
3039}
3040
3041void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3042    const OffloadTargetRegionEntryInfoActTy &Action) {
3043  // Scan all target region entries and perform the provided action.
3044  for (const auto &D : OffloadEntriesTargetRegion)
3045    for (const auto &F : D.second)
3046      for (const auto &P : F.second)
3047        for (const auto &L : P.second)
3048          Action(D.first, F.first, P.first(), L.first, L.second);
3049}
3050
3051void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3052    initializeDeviceGlobalVarEntryInfo(StringRef Name,
3053                                       OMPTargetGlobalVarEntryKind Flags,
3054                                       unsigned Order) {
3055  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3056                                             "only required for the device "
3057                                             "code generation.");
3058  OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3059  ++OffloadingEntriesNum;
3060}
3061
3062void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3063    registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3064                                     CharUnits VarSize,
3065                                     OMPTargetGlobalVarEntryKind Flags,
3066                                     llvm::GlobalValue::LinkageTypes Linkage) {
3067  if (CGM.getLangOpts().OpenMPIsDevice) {
3068    // This could happen if the device compilation is invoked standalone.
3069    if (!hasDeviceGlobalVarEntryInfo(VarName))
3070      return;
3071    auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3072    if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3073      if (Entry.getVarSize().isZero()) {
3074        Entry.setVarSize(VarSize);
3075        Entry.setLinkage(Linkage);
3076      }
3077      return;
3078    }
3079    Entry.setVarSize(VarSize);
3080    Entry.setLinkage(Linkage);
3081    Entry.setAddress(Addr);
3082  } else {
3083    if (hasDeviceGlobalVarEntryInfo(VarName)) {
3084      auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3085      assert(Entry.isValid() && Entry.getFlags() == Flags &&
3086             "Entry not initialized!");
3087      if (Entry.getVarSize().isZero()) {
3088        Entry.setVarSize(VarSize);
3089        Entry.setLinkage(Linkage);
3090      }
3091      return;
3092    }
3093    OffloadEntriesDeviceGlobalVar.try_emplace(
3094        VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3095    ++OffloadingEntriesNum;
3096  }
3097}
3098
3099void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3100    actOnDeviceGlobalVarEntriesInfo(
3101        const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3102  // Scan all target region entries and perform the provided action.
3103  for (const auto &E : OffloadEntriesDeviceGlobalVar)
3104    Action(E.getKey(), E.getValue());
3105}
3106
3107void CGOpenMPRuntime::createOffloadEntry(
3108    llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3109    llvm::GlobalValue::LinkageTypes Linkage) {
3110  StringRef Name = Addr->getName();
3111  llvm::Module &M = CGM.getModule();
3112  llvm::LLVMContext &C = M.getContext();
3113
3114  // Create constant string with the name.
3115  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3116
3117  std::string StringName = getName({"omp_offloading", "entry_name"});
3118  auto *Str = new llvm::GlobalVariable(
3119      M, StrPtrInit->getType(), /*isConstant=*/true,
3120      llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3121  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3122
3123  llvm::Constant *Data[] = {
3124      llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3125      llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3126      llvm::ConstantInt::get(CGM.SizeTy, Size),
3127      llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3128      llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3129  std::string EntryName = getName({"omp_offloading", "entry", ""});
3130  llvm::GlobalVariable *Entry = createGlobalStruct(
3131      CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3132      Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3133
3134  // The entry has to be created in the section the linker expects it to be.
3135  Entry->setSection("omp_offloading_entries");
3136}
3137
3138void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3139  // Emit the offloading entries and metadata so that the device codegen side
3140  // can easily figure out what to emit. The produced metadata looks like
3141  // this:
3142  //
3143  // !omp_offload.info = !{!1, ...}
3144  //
3145  // Right now we only generate metadata for function that contain target
3146  // regions.
3147
3148  // If we are in simd mode or there are no entries, we don't need to do
3149  // anything.
3150  if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3151    return;
3152
3153  llvm::Module &M = CGM.getModule();
3154  llvm::LLVMContext &C = M.getContext();
3155  SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3156                         SourceLocation, StringRef>,
3157              16>
3158      OrderedEntries(OffloadEntriesInfoManager.size());
3159  llvm::SmallVector<StringRef, 16> ParentFunctions(
3160      OffloadEntriesInfoManager.size());
3161
3162  // Auxiliary methods to create metadata values and strings.
3163  auto &&GetMDInt = [this](unsigned V) {
3164    return llvm::ConstantAsMetadata::get(
3165        llvm::ConstantInt::get(CGM.Int32Ty, V));
3166  };
3167
3168  auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3169
3170  // Create the offloading info metadata node.
3171  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3172
3173  // Create function that emits metadata for each target region entry;
3174  auto &&TargetRegionMetadataEmitter =
3175      [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3176       &GetMDString](
3177          unsigned DeviceID, unsigned FileID, StringRef ParentName,
3178          unsigned Line,
3179          const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3180        // Generate metadata for target regions. Each entry of this metadata
3181        // contains:
3182        // - Entry 0 -> Kind of this type of metadata (0).
3183        // - Entry 1 -> Device ID of the file where the entry was identified.
3184        // - Entry 2 -> File ID of the file where the entry was identified.
3185        // - Entry 3 -> Mangled name of the function where the entry was
3186        // identified.
3187        // - Entry 4 -> Line in the file where the entry was identified.
3188        // - Entry 5 -> Order the entry was created.
3189        // The first element of the metadata node is the kind.
3190        llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3191                                 GetMDInt(FileID),      GetMDString(ParentName),
3192                                 GetMDInt(Line),        GetMDInt(E.getOrder())};
3193
3194        SourceLocation Loc;
3195        for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3196                  E = CGM.getContext().getSourceManager().fileinfo_end();
3197             I != E; ++I) {
3198          if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3199              I->getFirst()->getUniqueID().getFile() == FileID) {
3200            Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3201                I->getFirst(), Line, 1);
3202            break;
3203          }
3204        }
3205        // Save this entry in the right position of the ordered entries array.
3206        OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3207        ParentFunctions[E.getOrder()] = ParentName;
3208
3209        // Add metadata to the named metadata node.
3210        MD->addOperand(llvm::MDNode::get(C, Ops));
3211      };
3212
3213  OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3214      TargetRegionMetadataEmitter);
3215
3216  // Create function that emits metadata for each device global variable entry;
3217  auto &&DeviceGlobalVarMetadataEmitter =
3218      [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3219       MD](StringRef MangledName,
3220           const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3221               &E) {
3222        // Generate metadata for global variables. Each entry of this metadata
3223        // contains:
3224        // - Entry 0 -> Kind of this type of metadata (1).
3225        // - Entry 1 -> Mangled name of the variable.
3226        // - Entry 2 -> Declare target kind.
3227        // - Entry 3 -> Order the entry was created.
3228        // The first element of the metadata node is the kind.
3229        llvm::Metadata *Ops[] = {
3230            GetMDInt(E.getKind()), GetMDString(MangledName),
3231            GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3232
3233        // Save this entry in the right position of the ordered entries array.
3234        OrderedEntries[E.getOrder()] =
3235            std::make_tuple(&E, SourceLocation(), MangledName);
3236
3237        // Add metadata to the named metadata node.
3238        MD->addOperand(llvm::MDNode::get(C, Ops));
3239      };
3240
3241  OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3242      DeviceGlobalVarMetadataEmitter);
3243
3244  for (const auto &E : OrderedEntries) {
3245    assert(std::get<0>(E) && "All ordered entries must exist!");
3246    if (const auto *CE =
3247            dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3248                std::get<0>(E))) {
3249      if (!CE->getID() || !CE->getAddress()) {
3250        // Do not blame the entry if the parent funtion is not emitted.
3251        StringRef FnName = ParentFunctions[CE->getOrder()];
3252        if (!CGM.GetGlobalValue(FnName))
3253          continue;
3254        unsigned DiagID = CGM.getDiags().getCustomDiagID(
3255            DiagnosticsEngine::Error,
3256            "Offloading entry for target region in %0 is incorrect: either the "
3257            "address or the ID is invalid.");
3258        CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3259        continue;
3260      }
3261      createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3262                         CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3263    } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3264                                             OffloadEntryInfoDeviceGlobalVar>(
3265                   std::get<0>(E))) {
3266      OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3267          static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3268              CE->getFlags());
3269      switch (Flags) {
3270      case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3271        if (CGM.getLangOpts().OpenMPIsDevice &&
3272            CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3273          continue;
3274        if (!CE->getAddress()) {
3275          unsigned DiagID = CGM.getDiags().getCustomDiagID(
3276              DiagnosticsEngine::Error, "Offloading entry for declare target "
3277                                        "variable %0 is incorrect: the "
3278                                        "address is invalid.");
3279          CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3280          continue;
3281        }
3282        // The vaiable has no definition - no need to add the entry.
3283        if (CE->getVarSize().isZero())
3284          continue;
3285        break;
3286      }
3287      case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3288        assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3289                (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3290               "Declaret target link address is set.");
3291        if (CGM.getLangOpts().OpenMPIsDevice)
3292          continue;
3293        if (!CE->getAddress()) {
3294          unsigned DiagID = CGM.getDiags().getCustomDiagID(
3295              DiagnosticsEngine::Error,
3296              "Offloading entry for declare target variable is incorrect: the "
3297              "address is invalid.");
3298          CGM.getDiags().Report(DiagID);
3299          continue;
3300        }
3301        break;
3302      }
3303      createOffloadEntry(CE->getAddress(), CE->getAddress(),
3304                         CE->getVarSize().getQuantity(), Flags,
3305                         CE->getLinkage());
3306    } else {
3307      llvm_unreachable("Unsupported entry kind.");
3308    }
3309  }
3310}
3311
3312/// Loads all the offload entries information from the host IR
3313/// metadata.
3314void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3315  // If we are in target mode, load the metadata from the host IR. This code has
3316  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3317
3318  if (!CGM.getLangOpts().OpenMPIsDevice)
3319    return;
3320
3321  if (CGM.getLangOpts().OMPHostIRFile.empty())
3322    return;
3323
3324  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3325  if (auto EC = Buf.getError()) {
3326    CGM.getDiags().Report(diag::err_cannot_open_file)
3327        << CGM.getLangOpts().OMPHostIRFile << EC.message();
3328    return;
3329  }
3330
3331  llvm::LLVMContext C;
3332  auto ME = expectedToErrorOrAndEmitErrors(
3333      C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3334
3335  if (auto EC = ME.getError()) {
3336    unsigned DiagID = CGM.getDiags().getCustomDiagID(
3337        DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3338    CGM.getDiags().Report(DiagID)
3339        << CGM.getLangOpts().OMPHostIRFile << EC.message();
3340    return;
3341  }
3342
3343  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3344  if (!MD)
3345    return;
3346
3347  for (llvm::MDNode *MN : MD->operands()) {
3348    auto &&GetMDInt = [MN](unsigned Idx) {
3349      auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3350      return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3351    };
3352
3353    auto &&GetMDString = [MN](unsigned Idx) {
3354      auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3355      return V->getString();
3356    };
3357
3358    switch (GetMDInt(0)) {
3359    default:
3360      llvm_unreachable("Unexpected metadata!");
3361      break;
3362    case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3363        OffloadingEntryInfoTargetRegion:
3364      OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3365          /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3366          /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3367          /*Order=*/GetMDInt(5));
3368      break;
3369    case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3370        OffloadingEntryInfoDeviceGlobalVar:
3371      OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3372          /*MangledName=*/GetMDString(1),
3373          static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3374              /*Flags=*/GetMDInt(2)),
3375          /*Order=*/GetMDInt(3));
3376      break;
3377    }
3378  }
3379}
3380
3381void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3382  if (!KmpRoutineEntryPtrTy) {
3383    // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3384    ASTContext &C = CGM.getContext();
3385    QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3386    FunctionProtoType::ExtProtoInfo EPI;
3387    KmpRoutineEntryPtrQTy = C.getPointerType(
3388        C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3389    KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3390  }
3391}
3392
3393QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3394  // Make sure the type of the entry is already created. This is the type we
3395  // have to create:
3396  // struct __tgt_offload_entry{
3397  //   void      *addr;       // Pointer to the offload entry info.
3398  //                          // (function or global)
3399  //   char      *name;       // Name of the function or global.
3400  //   size_t     size;       // Size of the entry info (0 if it a function).
3401  //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3402  //   int32_t    reserved;   // Reserved, to use by the runtime library.
3403  // };
3404  if (TgtOffloadEntryQTy.isNull()) {
3405    ASTContext &C = CGM.getContext();
3406    RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3407    RD->startDefinition();
3408    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3409    addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3410    addFieldToRecordDecl(C, RD, C.getSizeType());
3411    addFieldToRecordDecl(
3412        C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3413    addFieldToRecordDecl(
3414        C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3415    RD->completeDefinition();
3416    RD->addAttr(PackedAttr::CreateImplicit(C));
3417    TgtOffloadEntryQTy = C.getRecordType(RD);
3418  }
3419  return TgtOffloadEntryQTy;
3420}
3421
3422namespace {
3423struct PrivateHelpersTy {
3424  PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3425                   const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3426      : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3427        PrivateElemInit(PrivateElemInit) {}
3428  PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3429  const Expr *OriginalRef = nullptr;
3430  const VarDecl *Original = nullptr;
3431  const VarDecl *PrivateCopy = nullptr;
3432  const VarDecl *PrivateElemInit = nullptr;
3433  bool isLocalPrivate() const {
3434    return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3435  }
3436};
3437typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3438} // anonymous namespace
3439
3440static bool isAllocatableDecl(const VarDecl *VD) {
3441  const VarDecl *CVD = VD->getCanonicalDecl();
3442  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3443    return false;
3444  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3445  // Use the default allocation.
3446  return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3447            AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3448           !AA->getAllocator());
3449}
3450
3451static RecordDecl *
3452createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3453  if (!Privates.empty()) {
3454    ASTContext &C = CGM.getContext();
3455    // Build struct .kmp_privates_t. {
3456    //         /*  private vars  */
3457    //       };
3458    RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3459    RD->startDefinition();
3460    for (const auto &Pair : Privates) {
3461      const VarDecl *VD = Pair.second.Original;
3462      QualType Type = VD->getType().getNonReferenceType();
3463      // If the private variable is a local variable with lvalue ref type,
3464      // allocate the pointer instead of the pointee type.
3465      if (Pair.second.isLocalPrivate()) {
3466        if (VD->getType()->isLValueReferenceType())
3467          Type = C.getPointerType(Type);
3468        if (isAllocatableDecl(VD))
3469          Type = C.getPointerType(Type);
3470      }
3471      FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3472      if (VD->hasAttrs()) {
3473        for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3474             E(VD->getAttrs().end());
3475             I != E; ++I)
3476          FD->addAttr(*I);
3477      }
3478    }
3479    RD->completeDefinition();
3480    return RD;
3481  }
3482  return nullptr;
3483}
3484
3485static RecordDecl *
3486createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3487                         QualType KmpInt32Ty,
3488                         QualType KmpRoutineEntryPointerQTy) {
3489  ASTContext &C = CGM.getContext();
3490  // Build struct kmp_task_t {
3491  //         void *              shareds;
3492  //         kmp_routine_entry_t routine;
3493  //         kmp_int32           part_id;
3494  //         kmp_cmplrdata_t data1;
3495  //         kmp_cmplrdata_t data2;
3496  // For taskloops additional fields:
3497  //         kmp_uint64          lb;
3498  //         kmp_uint64          ub;
3499  //         kmp_int64           st;
3500  //         kmp_int32           liter;
3501  //         void *              reductions;
3502  //       };
3503  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3504  UD->startDefinition();
3505  addFieldToRecordDecl(C, UD, KmpInt32Ty);
3506  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3507  UD->completeDefinition();
3508  QualType KmpCmplrdataTy = C.getRecordType(UD);
3509  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3510  RD->startDefinition();
3511  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3512  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3513  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3514  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3515  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3516  if (isOpenMPTaskLoopDirective(Kind)) {
3517    QualType KmpUInt64Ty =
3518        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3519    QualType KmpInt64Ty =
3520        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3521    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3522    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3523    addFieldToRecordDecl(C, RD, KmpInt64Ty);
3524    addFieldToRecordDecl(C, RD, KmpInt32Ty);
3525    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3526  }
3527  RD->completeDefinition();
3528  return RD;
3529}
3530
3531static RecordDecl *
3532createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3533                                     ArrayRef<PrivateDataTy> Privates) {
3534  ASTContext &C = CGM.getContext();
3535  // Build struct kmp_task_t_with_privates {
3536  //         kmp_task_t task_data;
3537  //         .kmp_privates_t. privates;
3538  //       };
3539  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3540  RD->startDefinition();
3541  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3542  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3543    addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3544  RD->completeDefinition();
3545  return RD;
3546}
3547
3548/// Emit a proxy function which accepts kmp_task_t as the second
3549/// argument.
3550/// \code
3551/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3552///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3553///   For taskloops:
3554///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3555///   tt->reductions, tt->shareds);
3556///   return 0;
3557/// }
3558/// \endcode
3559static llvm::Function *
3560emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3561                      OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3562                      QualType KmpTaskTWithPrivatesPtrQTy,
3563                      QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3564                      QualType SharedsPtrTy, llvm::Function *TaskFunction,
3565                      llvm::Value *TaskPrivatesMap) {
3566  ASTContext &C = CGM.getContext();
3567  FunctionArgList Args;
3568  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3569                            ImplicitParamDecl::Other);
3570  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3571                                KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3572                                ImplicitParamDecl::Other);
3573  Args.push_back(&GtidArg);
3574  Args.push_back(&TaskTypeArg);
3575  const auto &TaskEntryFnInfo =
3576      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3577  llvm::FunctionType *TaskEntryTy =
3578      CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3579  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3580  auto *TaskEntry = llvm::Function::Create(
3581      TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3582  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3583  TaskEntry->setDoesNotRecurse();
3584  CodeGenFunction CGF(CGM);
3585  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3586                    Loc, Loc);
3587
3588  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3589  // tt,
3590  // For taskloops:
3591  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3592  // tt->task_data.shareds);
3593  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3594      CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3595  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3596      CGF.GetAddrOfLocalVar(&TaskTypeArg),
3597      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3598  const auto *KmpTaskTWithPrivatesQTyRD =
3599      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3600  LValue Base =
3601      CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3602  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3603  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3604  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3605  llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3606
3607  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3608  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3609  llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3610      CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3611      CGF.ConvertTypeForMem(SharedsPtrTy));
3612
3613  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3614  llvm::Value *PrivatesParam;
3615  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3616    LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3617    PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3618        PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3619  } else {
3620    PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3621  }
3622
3623  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3624                               TaskPrivatesMap,
3625                               CGF.Builder
3626                                   .CreatePointerBitCastOrAddrSpaceCast(
3627                                       TDBase.getAddress(CGF), CGF.VoidPtrTy)
3628                                   .getPointer()};
3629  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3630                                          std::end(CommonArgs));
3631  if (isOpenMPTaskLoopDirective(Kind)) {
3632    auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3633    LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3634    llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3635    auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3636    LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3637    llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3638    auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3639    LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3640    llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3641    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3642    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3643    llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3644    auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3645    LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3646    llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3647    CallArgs.push_back(LBParam);
3648    CallArgs.push_back(UBParam);
3649    CallArgs.push_back(StParam);
3650    CallArgs.push_back(LIParam);
3651    CallArgs.push_back(RParam);
3652  }
3653  CallArgs.push_back(SharedsParam);
3654
3655  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3656                                                  CallArgs);
3657  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3658                             CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3659  CGF.FinishFunction();
3660  return TaskEntry;
3661}
3662
3663static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3664                                            SourceLocation Loc,
3665                                            QualType KmpInt32Ty,
3666                                            QualType KmpTaskTWithPrivatesPtrQTy,
3667                                            QualType KmpTaskTWithPrivatesQTy) {
3668  ASTContext &C = CGM.getContext();
3669  FunctionArgList Args;
3670  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3671                            ImplicitParamDecl::Other);
3672  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3673                                KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3674                                ImplicitParamDecl::Other);
3675  Args.push_back(&GtidArg);
3676  Args.push_back(&TaskTypeArg);
3677  const auto &DestructorFnInfo =
3678      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3679  llvm::FunctionType *DestructorFnTy =
3680      CGM.getTypes().GetFunctionType(DestructorFnInfo);
3681  std::string Name =
3682      CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3683  auto *DestructorFn =
3684      llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3685                             Name, &CGM.getModule());
3686  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3687                                    DestructorFnInfo);
3688  DestructorFn->setDoesNotRecurse();
3689  CodeGenFunction CGF(CGM);
3690  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3691                    Args, Loc, Loc);
3692
3693  LValue Base = CGF.EmitLoadOfPointerLValue(
3694      CGF.GetAddrOfLocalVar(&TaskTypeArg),
3695      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3696  const auto *KmpTaskTWithPrivatesQTyRD =
3697      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3698  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3699  Base = CGF.EmitLValueForField(Base, *FI);
3700  for (const auto *Field :
3701       cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3702    if (QualType::DestructionKind DtorKind =
3703            Field->getType().isDestructedType()) {
3704      LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3705      CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3706    }
3707  }
3708  CGF.FinishFunction();
3709  return DestructorFn;
3710}
3711
3712/// Emit a privates mapping function for correct handling of private and
3713/// firstprivate variables.
3714/// \code
3715/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3716/// **noalias priv1,...,  <tyn> **noalias privn) {
3717///   *priv1 = &.privates.priv1;
3718///   ...;
3719///   *privn = &.privates.privn;
3720/// }
3721/// \endcode
3722static llvm::Value *
3723emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3724                               const OMPTaskDataTy &Data, QualType PrivatesQTy,
3725                               ArrayRef<PrivateDataTy> Privates) {
3726  ASTContext &C = CGM.getContext();
3727  FunctionArgList Args;
3728  ImplicitParamDecl TaskPrivatesArg(
3729      C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3730      C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3731      ImplicitParamDecl::Other);
3732  Args.push_back(&TaskPrivatesArg);
3733  llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3734  unsigned Counter = 1;
3735  for (const Expr *E : Data.PrivateVars) {
3736    Args.push_back(ImplicitParamDecl::Create(
3737        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3738        C.getPointerType(C.getPointerType(E->getType()))
3739            .withConst()
3740            .withRestrict(),
3741        ImplicitParamDecl::Other));
3742    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3743    PrivateVarsPos[VD] = Counter;
3744    ++Counter;
3745  }
3746  for (const Expr *E : Data.FirstprivateVars) {
3747    Args.push_back(ImplicitParamDecl::Create(
3748        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3749        C.getPointerType(C.getPointerType(E->getType()))
3750            .withConst()
3751            .withRestrict(),
3752        ImplicitParamDecl::Other));
3753    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3754    PrivateVarsPos[VD] = Counter;
3755    ++Counter;
3756  }
3757  for (const Expr *E : Data.LastprivateVars) {
3758    Args.push_back(ImplicitParamDecl::Create(
3759        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3760        C.getPointerType(C.getPointerType(E->getType()))
3761            .withConst()
3762            .withRestrict(),
3763        ImplicitParamDecl::Other));
3764    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3765    PrivateVarsPos[VD] = Counter;
3766    ++Counter;
3767  }
3768  for (const VarDecl *VD : Data.PrivateLocals) {
3769    QualType Ty = VD->getType().getNonReferenceType();
3770    if (VD->getType()->isLValueReferenceType())
3771      Ty = C.getPointerType(Ty);
3772    if (isAllocatableDecl(VD))
3773      Ty = C.getPointerType(Ty);
3774    Args.push_back(ImplicitParamDecl::Create(
3775        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3776        C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3777        ImplicitParamDecl::Other));
3778    PrivateVarsPos[VD] = Counter;
3779    ++Counter;
3780  }
3781  const auto &TaskPrivatesMapFnInfo =
3782      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3783  llvm::FunctionType *TaskPrivatesMapTy =
3784      CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3785  std::string Name =
3786      CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3787  auto *TaskPrivatesMap = llvm::Function::Create(
3788      TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3789      &CGM.getModule());
3790  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3791                                    TaskPrivatesMapFnInfo);
3792  if (CGM.getLangOpts().Optimize) {
3793    TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3794    TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3795    TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3796  }
3797  CodeGenFunction CGF(CGM);
3798  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3799                    TaskPrivatesMapFnInfo, Args, Loc, Loc);
3800
3801  // *privi = &.privates.privi;
3802  LValue Base = CGF.EmitLoadOfPointerLValue(
3803      CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3804      TaskPrivatesArg.getType()->castAs<PointerType>());
3805  const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3806  Counter = 0;
3807  for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3808    LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3809    const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3810    LValue RefLVal =
3811        CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3812    LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3813        RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3814    CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3815    ++Counter;
3816  }
3817  CGF.FinishFunction();
3818  return TaskPrivatesMap;
3819}
3820
3821/// Emit initialization for private variables in task-based directives.
3822static void emitPrivatesInit(CodeGenFunction &CGF,
3823                             const OMPExecutableDirective &D,
3824                             Address KmpTaskSharedsPtr, LValue TDBase,
3825                             const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3826                             QualType SharedsTy, QualType SharedsPtrTy,
3827                             const OMPTaskDataTy &Data,
3828                             ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3829  ASTContext &C = CGF.getContext();
3830  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3831  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3832  OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3833                                 ? OMPD_taskloop
3834                                 : OMPD_task;
3835  const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3836  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3837  LValue SrcBase;
3838  bool IsTargetTask =
3839      isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3840      isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3841  // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3842  // PointersArray, SizesArray, and MappersArray. The original variables for
3843  // these arrays are not captured and we get their addresses explicitly.
3844  if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3845      (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3846    SrcBase = CGF.MakeAddrLValue(
3847        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3848            KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3849        SharedsTy);
3850  }
3851  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3852  for (const PrivateDataTy &Pair : Privates) {
3853    // Do not initialize private locals.
3854    if (Pair.second.isLocalPrivate()) {
3855      ++FI;
3856      continue;
3857    }
3858    const VarDecl *VD = Pair.second.PrivateCopy;
3859    const Expr *Init = VD->getAnyInitializer();
3860    if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3861                             !CGF.isTrivialInitializer(Init)))) {
3862      LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3863      if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3864        const VarDecl *OriginalVD = Pair.second.Original;
3865        // Check if the variable is the target-based BasePointersArray,
3866        // PointersArray, SizesArray, or MappersArray.
3867        LValue SharedRefLValue;
3868        QualType Type = PrivateLValue.getType();
3869        const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3870        if (IsTargetTask && !SharedField) {
3871          assert(isa<ImplicitParamDecl>(OriginalVD) &&
3872                 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3873                 cast<CapturedDecl>(OriginalVD->getDeclContext())
3874                         ->getNumParams() == 0 &&
3875                 isa<TranslationUnitDecl>(
3876                     cast<CapturedDecl>(OriginalVD->getDeclContext())
3877                         ->getDeclContext()) &&
3878                 "Expected artificial target data variable.");
3879          SharedRefLValue =
3880              CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3881        } else if (ForDup) {
3882          SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3883          SharedRefLValue = CGF.MakeAddrLValue(
3884              Address(SharedRefLValue.getPointer(CGF),
3885                      C.getDeclAlign(OriginalVD)),
3886              SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3887              SharedRefLValue.getTBAAInfo());
3888        } else if (CGF.LambdaCaptureFields.count(
3889                       Pair.second.Original->getCanonicalDecl()) > 0 ||
3890                   dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3891          SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3892        } else {
3893          // Processing for implicitly captured variables.
3894          InlinedOpenMPRegionRAII Region(
3895              CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3896              /*HasCancel=*/false, /*NoInheritance=*/true);
3897          SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3898        }
3899        if (Type->isArrayType()) {
3900          // Initialize firstprivate array.
3901          if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3902            // Perform simple memcpy.
3903            CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3904          } else {
3905            // Initialize firstprivate array using element-by-element
3906            // initialization.
3907            CGF.EmitOMPAggregateAssign(
3908                PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3909                Type,
3910                [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3911                                                  Address SrcElement) {
3912                  // Clean up any temporaries needed by the initialization.
3913                  CodeGenFunction::OMPPrivateScope InitScope(CGF);
3914                  InitScope.addPrivate(
3915                      Elem, [SrcElement]() -> Address { return SrcElement; });
3916                  (void)InitScope.Privatize();
3917                  // Emit initialization for single element.
3918                  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3919                      CGF, &CapturesInfo);
3920                  CGF.EmitAnyExprToMem(Init, DestElement,
3921                                       Init->getType().getQualifiers(),
3922                                       /*IsInitializer=*/false);
3923                });
3924          }
3925        } else {
3926          CodeGenFunction::OMPPrivateScope InitScope(CGF);
3927          InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3928            return SharedRefLValue.getAddress(CGF);
3929          });
3930          (void)InitScope.Privatize();
3931          CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3932          CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3933                             /*capturedByInit=*/false);
3934        }
3935      } else {
3936        CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3937      }
3938    }
3939    ++FI;
3940  }
3941}
3942
3943/// Check if duplication function is required for taskloops.
3944static bool checkInitIsRequired(CodeGenFunction &CGF,
3945                                ArrayRef<PrivateDataTy> Privates) {
3946  bool InitRequired = false;
3947  for (const PrivateDataTy &Pair : Privates) {
3948    if (Pair.second.isLocalPrivate())
3949      continue;
3950    const VarDecl *VD = Pair.second.PrivateCopy;
3951    const Expr *Init = VD->getAnyInitializer();
3952    InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3953                                    !CGF.isTrivialInitializer(Init));
3954    if (InitRequired)
3955      break;
3956  }
3957  return InitRequired;
3958}
3959
3960
3961/// Emit task_dup function (for initialization of
3962/// private/firstprivate/lastprivate vars and last_iter flag)
3963/// \code
3964/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3965/// lastpriv) {
3966/// // setup lastprivate flag
3967///    task_dst->last = lastpriv;
3968/// // could be constructor calls here...
3969/// }
3970/// \endcode
3971static llvm::Value *
3972emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3973                    const OMPExecutableDirective &D,
3974                    QualType KmpTaskTWithPrivatesPtrQTy,
3975                    const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3976                    const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3977                    QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3978                    ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3979  ASTContext &C = CGM.getContext();
3980  FunctionArgList Args;
3981  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3982                           KmpTaskTWithPrivatesPtrQTy,
3983                           ImplicitParamDecl::Other);
3984  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3985                           KmpTaskTWithPrivatesPtrQTy,
3986                           ImplicitParamDecl::Other);
3987  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3988                                ImplicitParamDecl::Other);
3989  Args.push_back(&DstArg);
3990  Args.push_back(&SrcArg);
3991  Args.push_back(&LastprivArg);
3992  const auto &TaskDupFnInfo =
3993      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3994  llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3995  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3996  auto *TaskDup = llvm::Function::Create(
3997      TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3998  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3999  TaskDup->setDoesNotRecurse();
4000  CodeGenFunction CGF(CGM);
4001  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4002                    Loc);
4003
4004  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4005      CGF.GetAddrOfLocalVar(&DstArg),
4006      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4007  // task_dst->liter = lastpriv;
4008  if (WithLastIter) {
4009    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4010    LValue Base = CGF.EmitLValueForField(
4011        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4012    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4013    llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4014        CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4015    CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4016  }
4017
4018  // Emit initial values for private copies (if any).
4019  assert(!Privates.empty());
4020  Address KmpTaskSharedsPtr = Address::invalid();
4021  if (!Data.FirstprivateVars.empty()) {
4022    LValue TDBase = CGF.EmitLoadOfPointerLValue(
4023        CGF.GetAddrOfLocalVar(&SrcArg),
4024        KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4025    LValue Base = CGF.EmitLValueForField(
4026        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4027    KmpTaskSharedsPtr = Address(
4028        CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4029                                 Base, *std::next(KmpTaskTQTyRD->field_begin(),
4030                                                  KmpTaskTShareds)),
4031                             Loc),
4032        CGM.getNaturalTypeAlignment(SharedsTy));
4033  }
4034  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4035                   SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4036  CGF.FinishFunction();
4037  return TaskDup;
4038}
4039
4040/// Checks if destructor function is required to be generated.
4041/// \return true if cleanups are required, false otherwise.
4042static bool
4043checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4044                         ArrayRef<PrivateDataTy> Privates) {
4045  for (const PrivateDataTy &P : Privates) {
4046    if (P.second.isLocalPrivate())
4047      continue;
4048    QualType Ty = P.second.Original->getType().getNonReferenceType();
4049    if (Ty.isDestructedType())
4050      return true;
4051  }
4052  return false;
4053}
4054
4055namespace {
4056/// Loop generator for OpenMP iterator expression.
4057class OMPIteratorGeneratorScope final
4058    : public CodeGenFunction::OMPPrivateScope {
4059  CodeGenFunction &CGF;
4060  const OMPIteratorExpr *E = nullptr;
4061  SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4062  SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4063  OMPIteratorGeneratorScope() = delete;
4064  OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4065
4066public:
4067  OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4068      : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4069    if (!E)
4070      return;
4071    SmallVector<llvm::Value *, 4> Uppers;
4072    for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4073      Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4074      const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4075      addPrivate(VD, [&CGF, VD]() {
4076        return CGF.CreateMemTemp(VD->getType(), VD->getName());
4077      });
4078      const OMPIteratorHelperData &HelperData = E->getHelper(I);
4079      addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4080        return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4081                                 "counter.addr");
4082      });
4083    }
4084    Privatize();
4085
4086    for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4087      const OMPIteratorHelperData &HelperData = E->getHelper(I);
4088      LValue CLVal =
4089          CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4090                             HelperData.CounterVD->getType());
4091      // Counter = 0;
4092      CGF.EmitStoreOfScalar(
4093          llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4094          CLVal);
4095      CodeGenFunction::JumpDest &ContDest =
4096          ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4097      CodeGenFunction::JumpDest &ExitDest =
4098          ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4099      // N = <number-of_iterations>;
4100      llvm::Value *N = Uppers[I];
4101      // cont:
4102      // if (Counter < N) goto body; else goto exit;
4103      CGF.EmitBlock(ContDest.getBlock());
4104      auto *CVal =
4105          CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4106      llvm::Value *Cmp =
4107          HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4108              ? CGF.Builder.CreateICmpSLT(CVal, N)
4109              : CGF.Builder.CreateICmpULT(CVal, N);
4110      llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4111      CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4112      // body:
4113      CGF.EmitBlock(BodyBB);
4114      // Iteri = Begini + Counter * Stepi;
4115      CGF.EmitIgnoredExpr(HelperData.Update);
4116    }
4117  }
4118  ~OMPIteratorGeneratorScope() {
4119    if (!E)
4120      return;
4121    for (unsigned I = E->numOfIterators(); I > 0; --I) {
4122      // Counter = Counter + 1;
4123      const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4124      CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4125      // goto cont;
4126      CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4127      // exit:
4128      CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4129    }
4130  }
4131};
4132} // namespace
4133
4134static std::pair<llvm::Value *, llvm::Value *>
4135getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4136  const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4137  llvm::Value *Addr;
4138  if (OASE) {
4139    const Expr *Base = OASE->getBase();
4140    Addr = CGF.EmitScalarExpr(Base);
4141  } else {
4142    Addr = CGF.EmitLValue(E).getPointer(CGF);
4143  }
4144  llvm::Value *SizeVal;
4145  QualType Ty = E->getType();
4146  if (OASE) {
4147    SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4148    for (const Expr *SE : OASE->getDimensions()) {
4149      llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4150      Sz = CGF.EmitScalarConversion(
4151          Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4152      SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4153    }
4154  } else if (const auto *ASE =
4155                 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4156    LValue UpAddrLVal =
4157        CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4158    llvm::Value *UpAddr =
4159        CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4160    llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4161    llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4162    SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4163  } else {
4164    SizeVal = CGF.getTypeSize(Ty);
4165  }
4166  return std::make_pair(Addr, SizeVal);
4167}
4168
4169/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4170static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4171  QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4172  if (KmpTaskAffinityInfoTy.isNull()) {
4173    RecordDecl *KmpAffinityInfoRD =
4174        C.buildImplicitRecord("kmp_task_affinity_info_t");
4175    KmpAffinityInfoRD->startDefinition();
4176    addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4177    addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4178    addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4179    KmpAffinityInfoRD->completeDefinition();
4180    KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4181  }
4182}
4183
4184CGOpenMPRuntime::TaskResultTy
4185CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4186                              const OMPExecutableDirective &D,
4187                              llvm::Function *TaskFunction, QualType SharedsTy,
4188                              Address Shareds, const OMPTaskDataTy &Data) {
4189  ASTContext &C = CGM.getContext();
4190  llvm::SmallVector<PrivateDataTy, 4> Privates;
4191  // Aggregate privates and sort them by the alignment.
4192  const auto *I = Data.PrivateCopies.begin();
4193  for (const Expr *E : Data.PrivateVars) {
4194    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4195    Privates.emplace_back(
4196        C.getDeclAlign(VD),
4197        PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4198                         /*PrivateElemInit=*/nullptr));
4199    ++I;
4200  }
4201  I = Data.FirstprivateCopies.begin();
4202  const auto *IElemInitRef = Data.FirstprivateInits.begin();
4203  for (const Expr *E : Data.FirstprivateVars) {
4204    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4205    Privates.emplace_back(
4206        C.getDeclAlign(VD),
4207        PrivateHelpersTy(
4208            E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4209            cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4210    ++I;
4211    ++IElemInitRef;
4212  }
4213  I = Data.LastprivateCopies.begin();
4214  for (const Expr *E : Data.LastprivateVars) {
4215    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4216    Privates.emplace_back(
4217        C.getDeclAlign(VD),
4218        PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4219                         /*PrivateElemInit=*/nullptr));
4220    ++I;
4221  }
4222  for (const VarDecl *VD : Data.PrivateLocals) {
4223    if (isAllocatableDecl(VD))
4224      Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4225    else
4226      Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4227  }
4228  llvm::stable_sort(Privates,
4229                    [](const PrivateDataTy &L, const PrivateDataTy &R) {
4230                      return L.first > R.first;
4231                    });
4232  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4233  // Build type kmp_routine_entry_t (if not built yet).
4234  emitKmpRoutineEntryT(KmpInt32Ty);
4235  // Build type kmp_task_t (if not built yet).
4236  if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4237    if (SavedKmpTaskloopTQTy.isNull()) {
4238      SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4239          CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4240    }
4241    KmpTaskTQTy = SavedKmpTaskloopTQTy;
4242  } else {
4243    assert((D.getDirectiveKind() == OMPD_task ||
4244            isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4245            isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4246           "Expected taskloop, task or target directive");
4247    if (SavedKmpTaskTQTy.isNull()) {
4248      SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4249          CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4250    }
4251    KmpTaskTQTy = SavedKmpTaskTQTy;
4252  }
4253  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4254  // Build particular struct kmp_task_t for the given task.
4255  const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4256      createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4257  QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4258  QualType KmpTaskTWithPrivatesPtrQTy =
4259      C.getPointerType(KmpTaskTWithPrivatesQTy);
4260  llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4261  llvm::Type *KmpTaskTWithPrivatesPtrTy =
4262      KmpTaskTWithPrivatesTy->getPointerTo();
4263  llvm::Value *KmpTaskTWithPrivatesTySize =
4264      CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4265  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4266
4267  // Emit initial values for private copies (if any).
4268  llvm::Value *TaskPrivatesMap = nullptr;
4269  llvm::Type *TaskPrivatesMapTy =
4270      std::next(TaskFunction->arg_begin(), 3)->getType();
4271  if (!Privates.empty()) {
4272    auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4273    TaskPrivatesMap =
4274        emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4275    TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4276        TaskPrivatesMap, TaskPrivatesMapTy);
4277  } else {
4278    TaskPrivatesMap = llvm::ConstantPointerNull::get(
4279        cast<llvm::PointerType>(TaskPrivatesMapTy));
4280  }
4281  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4282  // kmp_task_t *tt);
4283  llvm::Function *TaskEntry = emitProxyTaskFunction(
4284      CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4285      KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4286      TaskPrivatesMap);
4287
4288  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4289  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4290  // kmp_routine_entry_t *task_entry);
4291  // Task flags. Format is taken from
4292  // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4293  // description of kmp_tasking_flags struct.
4294  enum {
4295    TiedFlag = 0x1,
4296    FinalFlag = 0x2,
4297    DestructorsFlag = 0x8,
4298    PriorityFlag = 0x20,
4299    DetachableFlag = 0x40,
4300  };
4301  unsigned Flags = Data.Tied ? TiedFlag : 0;
4302  bool NeedsCleanup = false;
4303  if (!Privates.empty()) {
4304    NeedsCleanup =
4305        checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4306    if (NeedsCleanup)
4307      Flags = Flags | DestructorsFlag;
4308  }
4309  if (Data.Priority.getInt())
4310    Flags = Flags | PriorityFlag;
4311  if (D.hasClausesOfKind<OMPDetachClause>())
4312    Flags = Flags | DetachableFlag;
4313  llvm::Value *TaskFlags =
4314      Data.Final.getPointer()
4315          ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4316                                     CGF.Builder.getInt32(FinalFlag),
4317                                     CGF.Builder.getInt32(/*C=*/0))
4318          : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4319  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4320  llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4321  SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4322      getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4323      SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4324          TaskEntry, KmpRoutineEntryPtrTy)};
4325  llvm::Value *NewTask;
4326  if (D.hasClausesOfKind<OMPNowaitClause>()) {
4327    // Check if we have any device clause associated with the directive.
4328    const Expr *Device = nullptr;
4329    if (auto *C = D.getSingleClause<OMPDeviceClause>())
4330      Device = C->getDevice();
4331    // Emit device ID if any otherwise use default value.
4332    llvm::Value *DeviceID;
4333    if (Device)
4334      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4335                                           CGF.Int64Ty, /*isSigned=*/true);
4336    else
4337      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4338    AllocArgs.push_back(DeviceID);
4339    NewTask = CGF.EmitRuntimeCall(
4340        OMPBuilder.getOrCreateRuntimeFunction(
4341            CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4342        AllocArgs);
4343  } else {
4344    NewTask =
4345        CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4346                                CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4347                            AllocArgs);
4348  }
4349  // Emit detach clause initialization.
4350  // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4351  // task_descriptor);
4352  if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4353    const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4354    LValue EvtLVal = CGF.EmitLValue(Evt);
4355
4356    // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4357    // int gtid, kmp_task_t *task);
4358    llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4359    llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4360    Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4361    llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4362        OMPBuilder.getOrCreateRuntimeFunction(
4363            CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4364        {Loc, Tid, NewTask});
4365    EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4366                                      Evt->getExprLoc());
4367    CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4368  }
4369  // Process affinity clauses.
4370  if (D.hasClausesOfKind<OMPAffinityClause>()) {
4371    // Process list of affinity data.
4372    ASTContext &C = CGM.getContext();
4373    Address AffinitiesArray = Address::invalid();
4374    // Calculate number of elements to form the array of affinity data.
4375    llvm::Value *NumOfElements = nullptr;
4376    unsigned NumAffinities = 0;
4377    for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4378      if (const Expr *Modifier = C->getModifier()) {
4379        const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4380        for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4381          llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4382          Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4383          NumOfElements =
4384              NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4385        }
4386      } else {
4387        NumAffinities += C->varlist_size();
4388      }
4389    }
4390    getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4391    // Fields ids in kmp_task_affinity_info record.
4392    enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4393
4394    QualType KmpTaskAffinityInfoArrayTy;
4395    if (NumOfElements) {
4396      NumOfElements = CGF.Builder.CreateNUWAdd(
4397          llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4398      OpaqueValueExpr OVE(
4399          Loc,
4400          C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4401          VK_RValue);
4402      CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4403                                                    RValue::get(NumOfElements));
4404      KmpTaskAffinityInfoArrayTy =
4405          C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4406                                 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4407      // Properly emit variable-sized array.
4408      auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4409                                           ImplicitParamDecl::Other);
4410      CGF.EmitVarDecl(*PD);
4411      AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4412      NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4413                                                /*isSigned=*/false);
4414    } else {
4415      KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4416          KmpTaskAffinityInfoTy,
4417          llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4418          ArrayType::Normal, /*IndexTypeQuals=*/0);
4419      AffinitiesArray =
4420          CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4421      AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4422      NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4423                                             /*isSigned=*/false);
4424    }
4425
4426    const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4427    // Fill array by elements without iterators.
4428    unsigned Pos = 0;
4429    bool HasIterator = false;
4430    for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4431      if (C->getModifier()) {
4432        HasIterator = true;
4433        continue;
4434      }
4435      for (const Expr *E : C->varlists()) {
4436        llvm::Value *Addr;
4437        llvm::Value *Size;
4438        std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4439        LValue Base =
4440            CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4441                               KmpTaskAffinityInfoTy);
4442        // affs[i].base_addr = &<Affinities[i].second>;
4443        LValue BaseAddrLVal = CGF.EmitLValueForField(
4444            Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4445        CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4446                              BaseAddrLVal);
4447        // affs[i].len = sizeof(<Affinities[i].second>);
4448        LValue LenLVal = CGF.EmitLValueForField(
4449            Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4450        CGF.EmitStoreOfScalar(Size, LenLVal);
4451        ++Pos;
4452      }
4453    }
4454    LValue PosLVal;
4455    if (HasIterator) {
4456      PosLVal = CGF.MakeAddrLValue(
4457          CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4458          C.getSizeType());
4459      CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4460    }
4461    // Process elements with iterators.
4462    for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4463      const Expr *Modifier = C->getModifier();
4464      if (!Modifier)
4465        continue;
4466      OMPIteratorGeneratorScope IteratorScope(
4467          CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4468      for (const Expr *E : C->varlists()) {
4469        llvm::Value *Addr;
4470        llvm::Value *Size;
4471        std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4472        llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4473        LValue Base = CGF.MakeAddrLValue(
4474            Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4475                    AffinitiesArray.getAlignment()),
4476            KmpTaskAffinityInfoTy);
4477        // affs[i].base_addr = &<Affinities[i].second>;
4478        LValue BaseAddrLVal = CGF.EmitLValueForField(
4479            Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4480        CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4481                              BaseAddrLVal);
4482        // affs[i].len = sizeof(<Affinities[i].second>);
4483        LValue LenLVal = CGF.EmitLValueForField(
4484            Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4485        CGF.EmitStoreOfScalar(Size, LenLVal);
4486        Idx = CGF.Builder.CreateNUWAdd(
4487            Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4488        CGF.EmitStoreOfScalar(Idx, PosLVal);
4489      }
4490    }
4491    // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4492    // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4493    // naffins, kmp_task_affinity_info_t *affin_list);
4494    llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4495    llvm::Value *GTid = getThreadID(CGF, Loc);
4496    llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4497        AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4498    // FIXME: Emit the function and ignore its result for now unless the
4499    // runtime function is properly implemented.
4500    (void)CGF.EmitRuntimeCall(
4501        OMPBuilder.getOrCreateRuntimeFunction(
4502            CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4503        {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4504  }
4505  llvm::Value *NewTaskNewTaskTTy =
4506      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4507          NewTask, KmpTaskTWithPrivatesPtrTy);
4508  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4509                                               KmpTaskTWithPrivatesQTy);
4510  LValue TDBase =
4511      CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4512  // Fill the data in the resulting kmp_task_t record.
4513  // Copy shareds if there are any.
4514  Address KmpTaskSharedsPtr = Address::invalid();
4515  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4516    KmpTaskSharedsPtr =
4517        Address(CGF.EmitLoadOfScalar(
4518                    CGF.EmitLValueForField(
4519                        TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4520                                           KmpTaskTShareds)),
4521                    Loc),
4522                CGM.getNaturalTypeAlignment(SharedsTy));
4523    LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4524    LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4525    CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4526  }
4527  // Emit initial values for private copies (if any).
4528  TaskResultTy Result;
4529  if (!Privates.empty()) {
4530    emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4531                     SharedsTy, SharedsPtrTy, Data, Privates,
4532                     /*ForDup=*/false);
4533    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4534        (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4535      Result.TaskDupFn = emitTaskDupFunction(
4536          CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4537          KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4538          /*WithLastIter=*/!Data.LastprivateVars.empty());
4539    }
4540  }
4541  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4542  enum { Priority = 0, Destructors = 1 };
4543  // Provide pointer to function with destructors for privates.
4544  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4545  const RecordDecl *KmpCmplrdataUD =
4546      (*FI)->getType()->getAsUnionType()->getDecl();
4547  if (NeedsCleanup) {
4548    llvm::Value *DestructorFn = emitDestructorsFunction(
4549        CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4550        KmpTaskTWithPrivatesQTy);
4551    LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4552    LValue DestructorsLV = CGF.EmitLValueForField(
4553        Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4554    CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4555                              DestructorFn, KmpRoutineEntryPtrTy),
4556                          DestructorsLV);
4557  }
4558  // Set priority.
4559  if (Data.Priority.getInt()) {
4560    LValue Data2LV = CGF.EmitLValueForField(
4561        TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4562    LValue PriorityLV = CGF.EmitLValueForField(
4563        Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4564    CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4565  }
4566  Result.NewTask = NewTask;
4567  Result.TaskEntry = TaskEntry;
4568  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4569  Result.TDBase = TDBase;
4570  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4571  return Result;
4572}
4573
4574namespace {
4575/// Dependence kind for RTL.
4576enum RTLDependenceKindTy {
4577  DepIn = 0x01,
4578  DepInOut = 0x3,
4579  DepMutexInOutSet = 0x4
4580};
4581/// Fields ids in kmp_depend_info record.
4582enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4583} // namespace
4584
4585/// Translates internal dependency kind into the runtime kind.
4586static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4587  RTLDependenceKindTy DepKind;
4588  switch (K) {
4589  case OMPC_DEPEND_in:
4590    DepKind = DepIn;
4591    break;
4592  // Out and InOut dependencies must use the same code.
4593  case OMPC_DEPEND_out:
4594  case OMPC_DEPEND_inout:
4595    DepKind = DepInOut;
4596    break;
4597  case OMPC_DEPEND_mutexinoutset:
4598    DepKind = DepMutexInOutSet;
4599    break;
4600  case OMPC_DEPEND_source:
4601  case OMPC_DEPEND_sink:
4602  case OMPC_DEPEND_depobj:
4603  case OMPC_DEPEND_unknown:
4604    llvm_unreachable("Unknown task dependence type");
4605  }
4606  return DepKind;
4607}
4608
4609/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4610static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4611                           QualType &FlagsTy) {
4612  FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4613  if (KmpDependInfoTy.isNull()) {
4614    RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4615    KmpDependInfoRD->startDefinition();
4616    addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4617    addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4618    addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4619    KmpDependInfoRD->completeDefinition();
4620    KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4621  }
4622}
4623
4624std::pair<llvm::Value *, LValue>
4625CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4626                                   SourceLocation Loc) {
4627  ASTContext &C = CGM.getContext();
4628  QualType FlagsTy;
4629  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4630  RecordDecl *KmpDependInfoRD =
4631      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4632  LValue Base = CGF.EmitLoadOfPointerLValue(
4633      DepobjLVal.getAddress(CGF),
4634      C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4635  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4636  Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4637          Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4638  Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4639                            Base.getTBAAInfo());
4640  llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4641      Addr.getPointer(),
4642      llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4643  LValue NumDepsBase = CGF.MakeAddrLValue(
4644      Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4645      Base.getBaseInfo(), Base.getTBAAInfo());
4646  // NumDeps = deps[i].base_addr;
4647  LValue BaseAddrLVal = CGF.EmitLValueForField(
4648      NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4649  llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4650  return std::make_pair(NumDeps, Base);
4651}
4652
4653static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4654                           llvm::PointerUnion<unsigned *, LValue *> Pos,
4655                           const OMPTaskDataTy::DependData &Data,
4656                           Address DependenciesArray) {
4657  CodeGenModule &CGM = CGF.CGM;
4658  ASTContext &C = CGM.getContext();
4659  QualType FlagsTy;
4660  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4661  RecordDecl *KmpDependInfoRD =
4662      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4663  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4664
4665  OMPIteratorGeneratorScope IteratorScope(
4666      CGF, cast_or_null<OMPIteratorExpr>(
4667               Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4668                                 : nullptr));
4669  for (const Expr *E : Data.DepExprs) {
4670    llvm::Value *Addr;
4671    llvm::Value *Size;
4672    std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4673    LValue Base;
4674    if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4675      Base = CGF.MakeAddrLValue(
4676          CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4677    } else {
4678      LValue &PosLVal = *Pos.get<LValue *>();
4679      llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4680      Base = CGF.MakeAddrLValue(
4681          Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4682                  DependenciesArray.getAlignment()),
4683          KmpDependInfoTy);
4684    }
4685    // deps[i].base_addr = &<Dependencies[i].second>;
4686    LValue BaseAddrLVal = CGF.EmitLValueForField(
4687        Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4688    CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4689                          BaseAddrLVal);
4690    // deps[i].len = sizeof(<Dependencies[i].second>);
4691    LValue LenLVal = CGF.EmitLValueForField(
4692        Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4693    CGF.EmitStoreOfScalar(Size, LenLVal);
4694    // deps[i].flags = <Dependencies[i].first>;
4695    RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4696    LValue FlagsLVal = CGF.EmitLValueForField(
4697        Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4698    CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4699                          FlagsLVal);
4700    if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4701      ++(*P);
4702    } else {
4703      LValue &PosLVal = *Pos.get<LValue *>();
4704      llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4705      Idx = CGF.Builder.CreateNUWAdd(Idx,
4706                                     llvm::ConstantInt::get(Idx->getType(), 1));
4707      CGF.EmitStoreOfScalar(Idx, PosLVal);
4708    }
4709  }
4710}
4711
4712static SmallVector<llvm::Value *, 4>
4713emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4714                        const OMPTaskDataTy::DependData &Data) {
4715  assert(Data.DepKind == OMPC_DEPEND_depobj &&
4716         "Expected depobj dependecy kind.");
4717  SmallVector<llvm::Value *, 4> Sizes;
4718  SmallVector<LValue, 4> SizeLVals;
4719  ASTContext &C = CGF.getContext();
4720  QualType FlagsTy;
4721  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4722  RecordDecl *KmpDependInfoRD =
4723      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4724  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4725  llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4726  {
4727    OMPIteratorGeneratorScope IteratorScope(
4728        CGF, cast_or_null<OMPIteratorExpr>(
4729                 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4730                                   : nullptr));
4731    for (const Expr *E : Data.DepExprs) {
4732      LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4733      LValue Base = CGF.EmitLoadOfPointerLValue(
4734          DepobjLVal.getAddress(CGF),
4735          C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4736      Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4737          Base.getAddress(CGF), KmpDependInfoPtrT);
4738      Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4739                                Base.getTBAAInfo());
4740      llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4741          Addr.getPointer(),
4742          llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4743      LValue NumDepsBase = CGF.MakeAddrLValue(
4744          Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4745          Base.getBaseInfo(), Base.getTBAAInfo());
4746      // NumDeps = deps[i].base_addr;
4747      LValue BaseAddrLVal = CGF.EmitLValueForField(
4748          NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4749      llvm::Value *NumDeps =
4750          CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4751      LValue NumLVal = CGF.MakeAddrLValue(
4752          CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4753          C.getUIntPtrType());
4754      CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4755                         llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4756      llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4757      llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4758      CGF.EmitStoreOfScalar(Add, NumLVal);
4759      SizeLVals.push_back(NumLVal);
4760    }
4761  }
4762  for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4763    llvm::Value *Size =
4764        CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4765    Sizes.push_back(Size);
4766  }
4767  return Sizes;
4768}
4769
4770static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4771                               LValue PosLVal,
4772                               const OMPTaskDataTy::DependData &Data,
4773                               Address DependenciesArray) {
4774  assert(Data.DepKind == OMPC_DEPEND_depobj &&
4775         "Expected depobj dependecy kind.");
4776  ASTContext &C = CGF.getContext();
4777  QualType FlagsTy;
4778  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4779  RecordDecl *KmpDependInfoRD =
4780      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4781  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4782  llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4783  llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4784  {
4785    OMPIteratorGeneratorScope IteratorScope(
4786        CGF, cast_or_null<OMPIteratorExpr>(
4787                 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4788                                   : nullptr));
4789    for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4790      const Expr *E = Data.DepExprs[I];
4791      LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4792      LValue Base = CGF.EmitLoadOfPointerLValue(
4793          DepobjLVal.getAddress(CGF),
4794          C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4795      Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4796          Base.getAddress(CGF), KmpDependInfoPtrT);
4797      Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4798                                Base.getTBAAInfo());
4799
4800      // Get number of elements in a single depobj.
4801      llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4802          Addr.getPointer(),
4803          llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4804      LValue NumDepsBase = CGF.MakeAddrLValue(
4805          Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4806          Base.getBaseInfo(), Base.getTBAAInfo());
4807      // NumDeps = deps[i].base_addr;
4808      LValue BaseAddrLVal = CGF.EmitLValueForField(
4809          NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4810      llvm::Value *NumDeps =
4811          CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4812
4813      // memcopy dependency data.
4814      llvm::Value *Size = CGF.Builder.CreateNUWMul(
4815          ElSize,
4816          CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4817      llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4818      Address DepAddr =
4819          Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4820                  DependenciesArray.getAlignment());
4821      CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4822
4823      // Increase pos.
4824      // pos += size;
4825      llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4826      CGF.EmitStoreOfScalar(Add, PosLVal);
4827    }
4828  }
4829}
4830
4831std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4832    CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4833    SourceLocation Loc) {
4834  if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4835        return D.DepExprs.empty();
4836      }))
4837    return std::make_pair(nullptr, Address::invalid());
4838  // Process list of dependencies.
4839  ASTContext &C = CGM.getContext();
4840  Address DependenciesArray = Address::invalid();
4841  llvm::Value *NumOfElements = nullptr;
4842  unsigned NumDependencies = std::accumulate(
4843      Dependencies.begin(), Dependencies.end(), 0,
4844      [](unsigned V, const OMPTaskDataTy::DependData &D) {
4845        return D.DepKind == OMPC_DEPEND_depobj
4846                   ? V
4847                   : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4848      });
4849  QualType FlagsTy;
4850  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4851  bool HasDepobjDeps = false;
4852  bool HasRegularWithIterators = false;
4853  llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4854  llvm::Value *NumOfRegularWithIterators =
4855      llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4856  // Calculate number of depobj dependecies and regular deps with the iterators.
4857  for (const OMPTaskDataTy::DependData &D : Dependencies) {
4858    if (D.DepKind == OMPC_DEPEND_depobj) {
4859      SmallVector<llvm::Value *, 4> Sizes =
4860          emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4861      for (llvm::Value *Size : Sizes) {
4862        NumOfDepobjElements =
4863            CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4864      }
4865      HasDepobjDeps = true;
4866      continue;
4867    }
4868    // Include number of iterations, if any.
4869    if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4870      for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4871        llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4872        Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4873        NumOfRegularWithIterators =
4874            CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4875      }
4876      HasRegularWithIterators = true;
4877      continue;
4878    }
4879  }
4880
4881  QualType KmpDependInfoArrayTy;
4882  if (HasDepobjDeps || HasRegularWithIterators) {
4883    NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4884                                           /*isSigned=*/false);
4885    if (HasDepobjDeps) {
4886      NumOfElements =
4887          CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4888    }
4889    if (HasRegularWithIterators) {
4890      NumOfElements =
4891          CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4892    }
4893    OpaqueValueExpr OVE(Loc,
4894                        C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4895                        VK_RValue);
4896    CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4897                                                  RValue::get(NumOfElements));
4898    KmpDependInfoArrayTy =
4899        C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4900                               /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4901    // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4902    // Properly emit variable-sized array.
4903    auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4904                                         ImplicitParamDecl::Other);
4905    CGF.EmitVarDecl(*PD);
4906    DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4907    NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4908                                              /*isSigned=*/false);
4909  } else {
4910    KmpDependInfoArrayTy = C.getConstantArrayType(
4911        KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4912        ArrayType::Normal, /*IndexTypeQuals=*/0);
4913    DependenciesArray =
4914        CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4915    DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4916    NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4917                                           /*isSigned=*/false);
4918  }
4919  unsigned Pos = 0;
4920  for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4921    if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4922        Dependencies[I].IteratorExpr)
4923      continue;
4924    emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4925                   DependenciesArray);
4926  }
4927  // Copy regular dependecies with iterators.
4928  LValue PosLVal = CGF.MakeAddrLValue(
4929      CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4930  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4931  for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4932    if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4933        !Dependencies[I].IteratorExpr)
4934      continue;
4935    emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4936                   DependenciesArray);
4937  }
4938  // Copy final depobj arrays without iterators.
4939  if (HasDepobjDeps) {
4940    for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4941      if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4942        continue;
4943      emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4944                         DependenciesArray);
4945    }
4946  }
4947  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4948      DependenciesArray, CGF.VoidPtrTy);
4949  return std::make_pair(NumOfElements, DependenciesArray);
4950}
4951
4952Address CGOpenMPRuntime::emitDepobjDependClause(
4953    CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4954    SourceLocation Loc) {
4955  if (Dependencies.DepExprs.empty())
4956    return Address::invalid();
4957  // Process list of dependencies.
4958  ASTContext &C = CGM.getContext();
4959  Address DependenciesArray = Address::invalid();
4960  unsigned NumDependencies = Dependencies.DepExprs.size();
4961  QualType FlagsTy;
4962  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4963  RecordDecl *KmpDependInfoRD =
4964      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4965
4966  llvm::Value *Size;
4967  // Define type kmp_depend_info[<Dependencies.size()>];
4968  // For depobj reserve one extra element to store the number of elements.
4969  // It is required to handle depobj(x) update(in) construct.
4970  // kmp_depend_info[<Dependencies.size()>] deps;
4971  llvm::Value *NumDepsVal;
4972  CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4973  if (const auto *IE =
4974          cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4975    NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4976    for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4977      llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4978      Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4979      NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4980    }
4981    Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4982                                    NumDepsVal);
4983    CharUnits SizeInBytes =
4984        C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4985    llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4986    Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4987    NumDepsVal =
4988        CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4989  } else {
4990    QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4991        KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4992        nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4993    CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4994    Size = CGM.getSize(Sz.alignTo(Align));
4995    NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4996  }
4997  // Need to allocate on the dynamic memory.
4998  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4999  // Use default allocator.
5000  llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5001  llvm::Value *Args[] = {ThreadID, Size, Allocator};
5002
5003  llvm::Value *Addr =
5004      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5005                              CGM.getModule(), OMPRTL___kmpc_alloc),
5006                          Args, ".dep.arr.addr");
5007  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5008      Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5009  DependenciesArray = Address(Addr, Align);
5010  // Write number of elements in the first element of array for depobj.
5011  LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5012  // deps[i].base_addr = NumDependencies;
5013  LValue BaseAddrLVal = CGF.EmitLValueForField(
5014      Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5015  CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5016  llvm::PointerUnion<unsigned *, LValue *> Pos;
5017  unsigned Idx = 1;
5018  LValue PosLVal;
5019  if (Dependencies.IteratorExpr) {
5020    PosLVal = CGF.MakeAddrLValue(
5021        CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5022        C.getSizeType());
5023    CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5024                          /*IsInit=*/true);
5025    Pos = &PosLVal;
5026  } else {
5027    Pos = &Idx;
5028  }
5029  emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5030  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5031      CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5032  return DependenciesArray;
5033}
5034
5035void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5036                                        SourceLocation Loc) {
5037  ASTContext &C = CGM.getContext();
5038  QualType FlagsTy;
5039  getDependTypes(C, KmpDependInfoTy, FlagsTy);
5040  LValue Base = CGF.EmitLoadOfPointerLValue(
5041      DepobjLVal.getAddress(CGF),
5042      C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5043  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5044  Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5045      Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5046  llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5047      Addr.getPointer(),
5048      llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5049  DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5050                                                               CGF.VoidPtrTy);
5051  llvm::Value *ThreadID = getThreadID(CGF, Loc);
5052  // Use default allocator.
5053  llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5054  llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5055
5056  // _kmpc_free(gtid, addr, nullptr);
5057  (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5058                                CGM.getModule(), OMPRTL___kmpc_free),
5059                            Args);
5060}
5061
5062void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5063                                       OpenMPDependClauseKind NewDepKind,
5064                                       SourceLocation Loc) {
5065  ASTContext &C = CGM.getContext();
5066  QualType FlagsTy;
5067  getDependTypes(C, KmpDependInfoTy, FlagsTy);
5068  RecordDecl *KmpDependInfoRD =
5069      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5070  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5071  llvm::Value *NumDeps;
5072  LValue Base;
5073  std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5074
5075  Address Begin = Base.getAddress(CGF);
5076  // Cast from pointer to array type to pointer to single element.
5077  llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5078  // The basic structure here is a while-do loop.
5079  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5080  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5081  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5082  CGF.EmitBlock(BodyBB);
5083  llvm::PHINode *ElementPHI =
5084      CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5085  ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5086  Begin = Address(ElementPHI, Begin.getAlignment());
5087  Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5088                            Base.getTBAAInfo());
5089  // deps[i].flags = NewDepKind;
5090  RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5091  LValue FlagsLVal = CGF.EmitLValueForField(
5092      Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5093  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5094                        FlagsLVal);
5095
5096  // Shift the address forward by one element.
5097  Address ElementNext =
5098      CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5099  ElementPHI->addIncoming(ElementNext.getPointer(),
5100                          CGF.Builder.GetInsertBlock());
5101  llvm::Value *IsEmpty =
5102      CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5103  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5104  // Done.
5105  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5106}
5107
5108void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5109                                   const OMPExecutableDirective &D,
5110                                   llvm::Function *TaskFunction,
5111                                   QualType SharedsTy, Address Shareds,
5112                                   const Expr *IfCond,
5113                                   const OMPTaskDataTy &Data) {
5114  if (!CGF.HaveInsertPoint())
5115    return;
5116
5117  TaskResultTy Result =
5118      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5119  llvm::Value *NewTask = Result.NewTask;
5120  llvm::Function *TaskEntry = Result.TaskEntry;
5121  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5122  LValue TDBase = Result.TDBase;
5123  const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5124  // Process list of dependences.
5125  Address DependenciesArray = Address::invalid();
5126  llvm::Value *NumOfElements;
5127  std::tie(NumOfElements, DependenciesArray) =
5128      emitDependClause(CGF, Data.Dependences, Loc);
5129
5130  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5131  // libcall.
5132  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5133  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5134  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5135  // list is not empty
5136  llvm::Value *ThreadID = getThreadID(CGF, Loc);
5137  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5138  llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5139  llvm::Value *DepTaskArgs[7];
5140  if (!Data.Dependences.empty()) {
5141    DepTaskArgs[0] = UpLoc;
5142    DepTaskArgs[1] = ThreadID;
5143    DepTaskArgs[2] = NewTask;
5144    DepTaskArgs[3] = NumOfElements;
5145    DepTaskArgs[4] = DependenciesArray.getPointer();
5146    DepTaskArgs[5] = CGF.Builder.getInt32(0);
5147    DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5148  }
5149  auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5150                        &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5151    if (!Data.Tied) {
5152      auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5153      LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5154      CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5155    }
5156    if (!Data.Dependences.empty()) {
5157      CGF.EmitRuntimeCall(
5158          OMPBuilder.getOrCreateRuntimeFunction(
5159              CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5160          DepTaskArgs);
5161    } else {
5162      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5163                              CGM.getModule(), OMPRTL___kmpc_omp_task),
5164                          TaskArgs);
5165    }
5166    // Check if parent region is untied and build return for untied task;
5167    if (auto *Region =
5168            dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5169      Region->emitUntiedSwitch(CGF);
5170  };
5171
5172  llvm::Value *DepWaitTaskArgs[6];
5173  if (!Data.Dependences.empty()) {
5174    DepWaitTaskArgs[0] = UpLoc;
5175    DepWaitTaskArgs[1] = ThreadID;
5176    DepWaitTaskArgs[2] = NumOfElements;
5177    DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5178    DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5179    DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5180  }
5181  auto &M = CGM.getModule();
5182  auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5183                        TaskEntry, &Data, &DepWaitTaskArgs,
5184                        Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5185    CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5186    // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5187    // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5188    // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5189    // is specified.
5190    if (!Data.Dependences.empty())
5191      CGF.EmitRuntimeCall(
5192          OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5193          DepWaitTaskArgs);
5194    // Call proxy_task_entry(gtid, new_task);
5195    auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5196                      Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5197      Action.Enter(CGF);
5198      llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5199      CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5200                                                          OutlinedFnArgs);
5201    };
5202
5203    // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5204    // kmp_task_t *new_task);
5205    // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5206    // kmp_task_t *new_task);
5207    RegionCodeGenTy RCG(CodeGen);
5208    CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5209                              M, OMPRTL___kmpc_omp_task_begin_if0),
5210                          TaskArgs,
5211                          OMPBuilder.getOrCreateRuntimeFunction(
5212                              M, OMPRTL___kmpc_omp_task_complete_if0),
5213                          TaskArgs);
5214    RCG.setAction(Action);
5215    RCG(CGF);
5216  };
5217
5218  if (IfCond) {
5219    emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5220  } else {
5221    RegionCodeGenTy ThenRCG(ThenCodeGen);
5222    ThenRCG(CGF);
5223  }
5224}
5225
5226void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5227                                       const OMPLoopDirective &D,
5228                                       llvm::Function *TaskFunction,
5229                                       QualType SharedsTy, Address Shareds,
5230                                       const Expr *IfCond,
5231                                       const OMPTaskDataTy &Data) {
5232  if (!CGF.HaveInsertPoint())
5233    return;
5234  TaskResultTy Result =
5235      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5236  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5237  // libcall.
5238  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5239  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5240  // sched, kmp_uint64 grainsize, void *task_dup);
5241  llvm::Value *ThreadID = getThreadID(CGF, Loc);
5242  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5243  llvm::Value *IfVal;
5244  if (IfCond) {
5245    IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5246                                      /*isSigned=*/true);
5247  } else {
5248    IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5249  }
5250
5251  LValue LBLVal = CGF.EmitLValueForField(
5252      Result.TDBase,
5253      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5254  const auto *LBVar =
5255      cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5256  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5257                       LBLVal.getQuals(),
5258                       /*IsInitializer=*/true);
5259  LValue UBLVal = CGF.EmitLValueForField(
5260      Result.TDBase,
5261      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5262  const auto *UBVar =
5263      cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5264  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5265                       UBLVal.getQuals(),
5266                       /*IsInitializer=*/true);
5267  LValue StLVal = CGF.EmitLValueForField(
5268      Result.TDBase,
5269      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5270  const auto *StVar =
5271      cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5272  CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5273                       StLVal.getQuals(),
5274                       /*IsInitializer=*/true);
5275  // Store reductions address.
5276  LValue RedLVal = CGF.EmitLValueForField(
5277      Result.TDBase,
5278      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5279  if (Data.Reductions) {
5280    CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5281  } else {
5282    CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5283                               CGF.getContext().VoidPtrTy);
5284  }
5285  enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5286  llvm::Value *TaskArgs[] = {
5287      UpLoc,
5288      ThreadID,
5289      Result.NewTask,
5290      IfVal,
5291      LBLVal.getPointer(CGF),
5292      UBLVal.getPointer(CGF),
5293      CGF.EmitLoadOfScalar(StLVal, Loc),
5294      llvm::ConstantInt::getSigned(
5295          CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5296      llvm::ConstantInt::getSigned(
5297          CGF.IntTy, Data.Schedule.getPointer()
5298                         ? Data.Schedule.getInt() ? NumTasks : Grainsize
5299                         : NoSchedule),
5300      Data.Schedule.getPointer()
5301          ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5302                                      /*isSigned=*/false)
5303          : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5304      Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5305                             Result.TaskDupFn, CGF.VoidPtrTy)
5306                       : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5307  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5308                          CGM.getModule(), OMPRTL___kmpc_taskloop),
5309                      TaskArgs);
5310}
5311
5312/// Emit reduction operation for each element of array (required for
5313/// array sections) LHS op = RHS.
5314/// \param Type Type of array.
5315/// \param LHSVar Variable on the left side of the reduction operation
5316/// (references element of array in original variable).
5317/// \param RHSVar Variable on the right side of the reduction operation
5318/// (references element of array in original variable).
5319/// \param RedOpGen Generator of reduction operation with use of LHSVar and
5320/// RHSVar.
5321static void EmitOMPAggregateReduction(
5322    CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5323    const VarDecl *RHSVar,
5324    const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5325                                  const Expr *, const Expr *)> &RedOpGen,
5326    const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5327    const Expr *UpExpr = nullptr) {
5328  // Perform element-by-element initialization.
5329  QualType ElementTy;
5330  Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5331  Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5332
5333  // Drill down to the base element type on both arrays.
5334  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5335  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5336
5337  llvm::Value *RHSBegin = RHSAddr.getPointer();
5338  llvm::Value *LHSBegin = LHSAddr.getPointer();
5339  // Cast from pointer to array type to pointer to single element.
5340  llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5341  // The basic structure here is a while-do loop.
5342  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5343  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5344  llvm::Value *IsEmpty =
5345      CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5346  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5347
5348  // Enter the loop body, making that address the current address.
5349  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5350  CGF.EmitBlock(BodyBB);
5351
5352  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5353
5354  llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5355      RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5356  RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5357  Address RHSElementCurrent =
5358      Address(RHSElementPHI,
5359              RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5360
5361  llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5362      LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5363  LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5364  Address LHSElementCurrent =
5365      Address(LHSElementPHI,
5366              LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5367
5368  // Emit copy.
5369  CodeGenFunction::OMPPrivateScope Scope(CGF);
5370  Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5371  Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5372  Scope.Privatize();
5373  RedOpGen(CGF, XExpr, EExpr, UpExpr);
5374  Scope.ForceCleanup();
5375
5376  // Shift the address forward by one element.
5377  llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5378      LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5379  llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5380      RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5381  // Check whether we've reached the end.
5382  llvm::Value *Done =
5383      CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5384  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5385  LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5386  RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5387
5388  // Done.
5389  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5390}
5391
5392/// Emit reduction combiner. If the combiner is a simple expression emit it as
5393/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5394/// UDR combiner function.
5395static void emitReductionCombiner(CodeGenFunction &CGF,
5396                                  const Expr *ReductionOp) {
5397  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5398    if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5399      if (const auto *DRE =
5400              dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5401        if (const auto *DRD =
5402                dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5403          std::pair<llvm::Function *, llvm::Function *> Reduction =
5404              CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5405          RValue Func = RValue::get(Reduction.first);
5406          CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5407          CGF.EmitIgnoredExpr(ReductionOp);
5408          return;
5409        }
5410  CGF.EmitIgnoredExpr(ReductionOp);
5411}
5412
5413llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5414    SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5415    ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5416    ArrayRef<const Expr *> ReductionOps) {
5417  ASTContext &C = CGM.getContext();
5418
5419  // void reduction_func(void *LHSArg, void *RHSArg);
5420  FunctionArgList Args;
5421  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5422                           ImplicitParamDecl::Other);
5423  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5424                           ImplicitParamDecl::Other);
5425  Args.push_back(&LHSArg);
5426  Args.push_back(&RHSArg);
5427  const auto &CGFI =
5428      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5429  std::string Name = getName({"omp", "reduction", "reduction_func"});
5430  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5431                                    llvm::GlobalValue::InternalLinkage, Name,
5432                                    &CGM.getModule());
5433  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5434  Fn->setDoesNotRecurse();
5435  CodeGenFunction CGF(CGM);
5436  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5437
5438  // Dst = (void*[n])(LHSArg);
5439  // Src = (void*[n])(RHSArg);
5440  Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5441      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5442      ArgsType), CGF.getPointerAlign());
5443  Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5444      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5445      ArgsType), CGF.getPointerAlign());
5446
5447  //  ...
5448  //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5449  //  ...
5450  CodeGenFunction::OMPPrivateScope Scope(CGF);
5451  auto IPriv = Privates.begin();
5452  unsigned Idx = 0;
5453  for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5454    const auto *RHSVar =
5455        cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5456    Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5457      return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5458    });
5459    const auto *LHSVar =
5460        cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5461    Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5462      return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5463    });
5464    QualType PrivTy = (*IPriv)->getType();
5465    if (PrivTy->isVariablyModifiedType()) {
5466      // Get array size and emit VLA type.
5467      ++Idx;
5468      Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5469      llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5470      const VariableArrayType *VLA =
5471          CGF.getContext().getAsVariableArrayType(PrivTy);
5472      const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5473      CodeGenFunction::OpaqueValueMapping OpaqueMap(
5474          CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5475      CGF.EmitVariablyModifiedType(PrivTy);
5476    }
5477  }
5478  Scope.Privatize();
5479  IPriv = Privates.begin();
5480  auto ILHS = LHSExprs.begin();
5481  auto IRHS = RHSExprs.begin();
5482  for (const Expr *E : ReductionOps) {
5483    if ((*IPriv)->getType()->isArrayType()) {
5484      // Emit reduction for array section.
5485      const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5486      const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5487      EmitOMPAggregateReduction(
5488          CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5489          [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5490            emitReductionCombiner(CGF, E);
5491          });
5492    } else {
5493      // Emit reduction for array subscript or single variable.
5494      emitReductionCombiner(CGF, E);
5495    }
5496    ++IPriv;
5497    ++ILHS;
5498    ++IRHS;
5499  }
5500  Scope.ForceCleanup();
5501  CGF.FinishFunction();
5502  return Fn;
5503}
5504
5505void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5506                                                  const Expr *ReductionOp,
5507                                                  const Expr *PrivateRef,
5508                                                  const DeclRefExpr *LHS,
5509                                                  const DeclRefExpr *RHS) {
5510  if (PrivateRef->getType()->isArrayType()) {
5511    // Emit reduction for array section.
5512    const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5513    const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5514    EmitOMPAggregateReduction(
5515        CGF, PrivateRef->getType(), LHSVar, RHSVar,
5516        [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5517          emitReductionCombiner(CGF, ReductionOp);
5518        });
5519  } else {
5520    // Emit reduction for array subscript or single variable.
5521    emitReductionCombiner(CGF, ReductionOp);
5522  }
5523}
5524
5525void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5526                                    ArrayRef<const Expr *> Privates,
5527                                    ArrayRef<const Expr *> LHSExprs,
5528                                    ArrayRef<const Expr *> RHSExprs,
5529                                    ArrayRef<const Expr *> ReductionOps,
5530                                    ReductionOptionsTy Options) {
5531  if (!CGF.HaveInsertPoint())
5532    return;
5533
5534  bool WithNowait = Options.WithNowait;
5535  bool SimpleReduction = Options.SimpleReduction;
5536
5537  // Next code should be emitted for reduction:
5538  //
5539  // static kmp_critical_name lock = { 0 };
5540  //
5541  // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5542  //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5543  //  ...
5544  //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5545  //  *(Type<n>-1*)rhs[<n>-1]);
5546  // }
5547  //
5548  // ...
5549  // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5550  // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5551  // RedList, reduce_func, &<lock>)) {
5552  // case 1:
5553  //  ...
5554  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5555  //  ...
5556  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5557  // break;
5558  // case 2:
5559  //  ...
5560  //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5561  //  ...
5562  // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5563  // break;
5564  // default:;
5565  // }
5566  //
5567  // if SimpleReduction is true, only the next code is generated:
5568  //  ...
5569  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5570  //  ...
5571
5572  ASTContext &C = CGM.getContext();
5573
5574  if (SimpleReduction) {
5575    CodeGenFunction::RunCleanupsScope Scope(CGF);
5576    auto IPriv = Privates.begin();
5577    auto ILHS = LHSExprs.begin();
5578    auto IRHS = RHSExprs.begin();
5579    for (const Expr *E : ReductionOps) {
5580      emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5581                                  cast<DeclRefExpr>(*IRHS));
5582      ++IPriv;
5583      ++ILHS;
5584      ++IRHS;
5585    }
5586    return;
5587  }
5588
5589  // 1. Build a list of reduction variables.
5590  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5591  auto Size = RHSExprs.size();
5592  for (const Expr *E : Privates) {
5593    if (E->getType()->isVariablyModifiedType())
5594      // Reserve place for array size.
5595      ++Size;
5596  }
5597  llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5598  QualType ReductionArrayTy =
5599      C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5600                             /*IndexTypeQuals=*/0);
5601  Address ReductionList =
5602      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5603  auto IPriv = Privates.begin();
5604  unsigned Idx = 0;
5605  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5606    Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5607    CGF.Builder.CreateStore(
5608        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5609            CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5610        Elem);
5611    if ((*IPriv)->getType()->isVariablyModifiedType()) {
5612      // Store array size.
5613      ++Idx;
5614      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5615      llvm::Value *Size = CGF.Builder.CreateIntCast(
5616          CGF.getVLASize(
5617                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5618              .NumElts,
5619          CGF.SizeTy, /*isSigned=*/false);
5620      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5621                              Elem);
5622    }
5623  }
5624
5625  // 2. Emit reduce_func().
5626  llvm::Function *ReductionFn = emitReductionFunction(
5627      Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5628      LHSExprs, RHSExprs, ReductionOps);
5629
5630  // 3. Create static kmp_critical_name lock = { 0 };
5631  std::string Name = getName({"reduction"});
5632  llvm::Value *Lock = getCriticalRegionLock(Name);
5633
5634  // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5635  // RedList, reduce_func, &<lock>);
5636  llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5637  llvm::Value *ThreadId = getThreadID(CGF, Loc);
5638  llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5639  llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5640      ReductionList.getPointer(), CGF.VoidPtrTy);
5641  llvm::Value *Args[] = {
5642      IdentTLoc,                             // ident_t *<loc>
5643      ThreadId,                              // i32 <gtid>
5644      CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5645      ReductionArrayTySize,                  // size_type sizeof(RedList)
5646      RL,                                    // void *RedList
5647      ReductionFn, // void (*) (void *, void *) <reduce_func>
5648      Lock         // kmp_critical_name *&<lock>
5649  };
5650  llvm::Value *Res = CGF.EmitRuntimeCall(
5651      OMPBuilder.getOrCreateRuntimeFunction(
5652          CGM.getModule(),
5653          WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5654      Args);
5655
5656  // 5. Build switch(res)
5657  llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5658  llvm::SwitchInst *SwInst =
5659      CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5660
5661  // 6. Build case 1:
5662  //  ...
5663  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5664  //  ...
5665  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5666  // break;
5667  llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5668  SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5669  CGF.EmitBlock(Case1BB);
5670
5671  // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5672  llvm::Value *EndArgs[] = {
5673      IdentTLoc, // ident_t *<loc>
5674      ThreadId,  // i32 <gtid>
5675      Lock       // kmp_critical_name *&<lock>
5676  };
5677  auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5678                       CodeGenFunction &CGF, PrePostActionTy &Action) {
5679    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5680    auto IPriv = Privates.begin();
5681    auto ILHS = LHSExprs.begin();
5682    auto IRHS = RHSExprs.begin();
5683    for (const Expr *E : ReductionOps) {
5684      RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5685                                     cast<DeclRefExpr>(*IRHS));
5686      ++IPriv;
5687      ++ILHS;
5688      ++IRHS;
5689    }
5690  };
5691  RegionCodeGenTy RCG(CodeGen);
5692  CommonActionTy Action(
5693      nullptr, llvm::None,
5694      OMPBuilder.getOrCreateRuntimeFunction(
5695          CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5696                                      : OMPRTL___kmpc_end_reduce),
5697      EndArgs);
5698  RCG.setAction(Action);
5699  RCG(CGF);
5700
5701  CGF.EmitBranch(DefaultBB);
5702
5703  // 7. Build case 2:
5704  //  ...
5705  //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5706  //  ...
5707  // break;
5708  llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5709  SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5710  CGF.EmitBlock(Case2BB);
5711
5712  auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5713                             CodeGenFunction &CGF, PrePostActionTy &Action) {
5714    auto ILHS = LHSExprs.begin();
5715    auto IRHS = RHSExprs.begin();
5716    auto IPriv = Privates.begin();
5717    for (const Expr *E : ReductionOps) {
5718      const Expr *XExpr = nullptr;
5719      const Expr *EExpr = nullptr;
5720      const Expr *UpExpr = nullptr;
5721      BinaryOperatorKind BO = BO_Comma;
5722      if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5723        if (BO->getOpcode() == BO_Assign) {
5724          XExpr = BO->getLHS();
5725          UpExpr = BO->getRHS();
5726        }
5727      }
5728      // Try to emit update expression as a simple atomic.
5729      const Expr *RHSExpr = UpExpr;
5730      if (RHSExpr) {
5731        // Analyze RHS part of the whole expression.
5732        if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5733                RHSExpr->IgnoreParenImpCasts())) {
5734          // If this is a conditional operator, analyze its condition for
5735          // min/max reduction operator.
5736          RHSExpr = ACO->getCond();
5737        }
5738        if (const auto *BORHS =
5739                dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5740          EExpr = BORHS->getRHS();
5741          BO = BORHS->getOpcode();
5742        }
5743      }
5744      if (XExpr) {
5745        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5746        auto &&AtomicRedGen = [BO, VD,
5747                               Loc](CodeGenFunction &CGF, const Expr *XExpr,
5748                                    const Expr *EExpr, const Expr *UpExpr) {
5749          LValue X = CGF.EmitLValue(XExpr);
5750          RValue E;
5751          if (EExpr)
5752            E = CGF.EmitAnyExpr(EExpr);
5753          CGF.EmitOMPAtomicSimpleUpdateExpr(
5754              X, E, BO, /*IsXLHSInRHSPart=*/true,
5755              llvm::AtomicOrdering::Monotonic, Loc,
5756              [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5757                CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5758                PrivateScope.addPrivate(
5759                    VD, [&CGF, VD, XRValue, Loc]() {
5760                      Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5761                      CGF.emitOMPSimpleStore(
5762                          CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5763                          VD->getType().getNonReferenceType(), Loc);
5764                      return LHSTemp;
5765                    });
5766                (void)PrivateScope.Privatize();
5767                return CGF.EmitAnyExpr(UpExpr);
5768              });
5769        };
5770        if ((*IPriv)->getType()->isArrayType()) {
5771          // Emit atomic reduction for array section.
5772          const auto *RHSVar =
5773              cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5774          EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5775                                    AtomicRedGen, XExpr, EExpr, UpExpr);
5776        } else {
5777          // Emit atomic reduction for array subscript or single variable.
5778          AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5779        }
5780      } else {
5781        // Emit as a critical region.
5782        auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5783                                           const Expr *, const Expr *) {
5784          CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5785          std::string Name = RT.getName({"atomic_reduction"});
5786          RT.emitCriticalRegion(
5787              CGF, Name,
5788              [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5789                Action.Enter(CGF);
5790                emitReductionCombiner(CGF, E);
5791              },
5792              Loc);
5793        };
5794        if ((*IPriv)->getType()->isArrayType()) {
5795          const auto *LHSVar =
5796              cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5797          const auto *RHSVar =
5798              cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5799          EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5800                                    CritRedGen);
5801        } else {
5802          CritRedGen(CGF, nullptr, nullptr, nullptr);
5803        }
5804      }
5805      ++ILHS;
5806      ++IRHS;
5807      ++IPriv;
5808    }
5809  };
5810  RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5811  if (!WithNowait) {
5812    // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5813    llvm::Value *EndArgs[] = {
5814        IdentTLoc, // ident_t *<loc>
5815        ThreadId,  // i32 <gtid>
5816        Lock       // kmp_critical_name *&<lock>
5817    };
5818    CommonActionTy Action(nullptr, llvm::None,
5819                          OMPBuilder.getOrCreateRuntimeFunction(
5820                              CGM.getModule(), OMPRTL___kmpc_end_reduce),
5821                          EndArgs);
5822    AtomicRCG.setAction(Action);
5823    AtomicRCG(CGF);
5824  } else {
5825    AtomicRCG(CGF);
5826  }
5827
5828  CGF.EmitBranch(DefaultBB);
5829  CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5830}
5831
5832/// Generates unique name for artificial threadprivate variables.
5833/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5834static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5835                                      const Expr *Ref) {
5836  SmallString<256> Buffer;
5837  llvm::raw_svector_ostream Out(Buffer);
5838  const clang::DeclRefExpr *DE;
5839  const VarDecl *D = ::getBaseDecl(Ref, DE);
5840  if (!D)
5841    D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5842  D = D->getCanonicalDecl();
5843  std::string Name = CGM.getOpenMPRuntime().getName(
5844      {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5845  Out << Prefix << Name << "_"
5846      << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5847  return std::string(Out.str());
5848}
5849
5850/// Emits reduction initializer function:
5851/// \code
5852/// void @.red_init(void* %arg, void* %orig) {
5853/// %0 = bitcast void* %arg to <type>*
5854/// store <type> <init>, <type>* %0
5855/// ret void
5856/// }
5857/// \endcode
5858static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5859                                           SourceLocation Loc,
5860                                           ReductionCodeGen &RCG, unsigned N) {
5861  ASTContext &C = CGM.getContext();
5862  QualType VoidPtrTy = C.VoidPtrTy;
5863  VoidPtrTy.addRestrict();
5864  FunctionArgList Args;
5865  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5866                          ImplicitParamDecl::Other);
5867  ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5868                              ImplicitParamDecl::Other);
5869  Args.emplace_back(&Param);
5870  Args.emplace_back(&ParamOrig);
5871  const auto &FnInfo =
5872      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5873  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5874  std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5875  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5876                                    Name, &CGM.getModule());
5877  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5878  Fn->setDoesNotRecurse();
5879  CodeGenFunction CGF(CGM);
5880  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5881  Address PrivateAddr = CGF.EmitLoadOfPointer(
5882      CGF.GetAddrOfLocalVar(&Param),
5883      C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5884  llvm::Value *Size = nullptr;
5885  // If the size of the reduction item is non-constant, load it from global
5886  // threadprivate variable.
5887  if (RCG.getSizes(N).second) {
5888    Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5889        CGF, CGM.getContext().getSizeType(),
5890        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5891    Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5892                                CGM.getContext().getSizeType(), Loc);
5893  }
5894  RCG.emitAggregateType(CGF, N, Size);
5895  LValue OrigLVal;
5896  // If initializer uses initializer from declare reduction construct, emit a
5897  // pointer to the address of the original reduction item (reuired by reduction
5898  // initializer)
5899  if (RCG.usesReductionInitializer(N)) {
5900    Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5901    SharedAddr = CGF.EmitLoadOfPointer(
5902        SharedAddr,
5903        CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5904    OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5905  } else {
5906    OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5907        llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5908        CGM.getContext().VoidPtrTy);
5909  }
5910  // Emit the initializer:
5911  // %0 = bitcast void* %arg to <type>*
5912  // store <type> <init>, <type>* %0
5913  RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5914                         [](CodeGenFunction &) { return false; });
5915  CGF.FinishFunction();
5916  return Fn;
5917}
5918
5919/// Emits reduction combiner function:
5920/// \code
5921/// void @.red_comb(void* %arg0, void* %arg1) {
5922/// %lhs = bitcast void* %arg0 to <type>*
5923/// %rhs = bitcast void* %arg1 to <type>*
5924/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5925/// store <type> %2, <type>* %lhs
5926/// ret void
5927/// }
5928/// \endcode
5929static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5930                                           SourceLocation Loc,
5931                                           ReductionCodeGen &RCG, unsigned N,
5932                                           const Expr *ReductionOp,
5933                                           const Expr *LHS, const Expr *RHS,
5934                                           const Expr *PrivateRef) {
5935  ASTContext &C = CGM.getContext();
5936  const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5937  const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5938  FunctionArgList Args;
5939  ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5940                               C.VoidPtrTy, ImplicitParamDecl::Other);
5941  ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5942                            ImplicitParamDecl::Other);
5943  Args.emplace_back(&ParamInOut);
5944  Args.emplace_back(&ParamIn);
5945  const auto &FnInfo =
5946      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5947  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5948  std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5949  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5950                                    Name, &CGM.getModule());
5951  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5952  Fn->setDoesNotRecurse();
5953  CodeGenFunction CGF(CGM);
5954  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5955  llvm::Value *Size = nullptr;
5956  // If the size of the reduction item is non-constant, load it from global
5957  // threadprivate variable.
5958  if (RCG.getSizes(N).second) {
5959    Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5960        CGF, CGM.getContext().getSizeType(),
5961        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5962    Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5963                                CGM.getContext().getSizeType(), Loc);
5964  }
5965  RCG.emitAggregateType(CGF, N, Size);
5966  // Remap lhs and rhs variables to the addresses of the function arguments.
5967  // %lhs = bitcast void* %arg0 to <type>*
5968  // %rhs = bitcast void* %arg1 to <type>*
5969  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5970  PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5971    // Pull out the pointer to the variable.
5972    Address PtrAddr = CGF.EmitLoadOfPointer(
5973        CGF.GetAddrOfLocalVar(&ParamInOut),
5974        C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5975    return CGF.Builder.CreateElementBitCast(
5976        PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5977  });
5978  PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5979    // Pull out the pointer to the variable.
5980    Address PtrAddr = CGF.EmitLoadOfPointer(
5981        CGF.GetAddrOfLocalVar(&ParamIn),
5982        C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5983    return CGF.Builder.CreateElementBitCast(
5984        PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5985  });
5986  PrivateScope.Privatize();
5987  // Emit the combiner body:
5988  // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5989  // store <type> %2, <type>* %lhs
5990  CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5991      CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5992      cast<DeclRefExpr>(RHS));
5993  CGF.FinishFunction();
5994  return Fn;
5995}
5996
5997/// Emits reduction finalizer function:
5998/// \code
5999/// void @.red_fini(void* %arg) {
6000/// %0 = bitcast void* %arg to <type>*
6001/// <destroy>(<type>* %0)
6002/// ret void
6003/// }
6004/// \endcode
6005static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6006                                           SourceLocation Loc,
6007                                           ReductionCodeGen &RCG, unsigned N) {
6008  if (!RCG.needCleanups(N))
6009    return nullptr;
6010  ASTContext &C = CGM.getContext();
6011  FunctionArgList Args;
6012  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6013                          ImplicitParamDecl::Other);
6014  Args.emplace_back(&Param);
6015  const auto &FnInfo =
6016      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6017  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6018  std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6019  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6020                                    Name, &CGM.getModule());
6021  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6022  Fn->setDoesNotRecurse();
6023  CodeGenFunction CGF(CGM);
6024  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6025  Address PrivateAddr = CGF.EmitLoadOfPointer(
6026      CGF.GetAddrOfLocalVar(&Param),
6027      C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6028  llvm::Value *Size = nullptr;
6029  // If the size of the reduction item is non-constant, load it from global
6030  // threadprivate variable.
6031  if (RCG.getSizes(N).second) {
6032    Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6033        CGF, CGM.getContext().getSizeType(),
6034        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6035    Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6036                                CGM.getContext().getSizeType(), Loc);
6037  }
6038  RCG.emitAggregateType(CGF, N, Size);
6039  // Emit the finalizer body:
6040  // <destroy>(<type>* %0)
6041  RCG.emitCleanups(CGF, N, PrivateAddr);
6042  CGF.FinishFunction(Loc);
6043  return Fn;
6044}
6045
6046llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6047    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6048    ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6049  if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6050    return nullptr;
6051
6052  // Build typedef struct:
6053  // kmp_taskred_input {
6054  //   void *reduce_shar; // shared reduction item
6055  //   void *reduce_orig; // original reduction item used for initialization
6056  //   size_t reduce_size; // size of data item
6057  //   void *reduce_init; // data initialization routine
6058  //   void *reduce_fini; // data finalization routine
6059  //   void *reduce_comb; // data combiner routine
6060  //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6061  // } kmp_taskred_input_t;
6062  ASTContext &C = CGM.getContext();
6063  RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6064  RD->startDefinition();
6065  const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6066  const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6067  const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6068  const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6069  const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6070  const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6071  const FieldDecl *FlagsFD = addFieldToRecordDecl(
6072      C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6073  RD->completeDefinition();
6074  QualType RDType = C.getRecordType(RD);
6075  unsigned Size = Data.ReductionVars.size();
6076  llvm::APInt ArraySize(/*numBits=*/64, Size);
6077  QualType ArrayRDType = C.getConstantArrayType(
6078      RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6079  // kmp_task_red_input_t .rd_input.[Size];
6080  Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6081  ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6082                       Data.ReductionCopies, Data.ReductionOps);
6083  for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6084    // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6085    llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6086                           llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6087    llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6088        TaskRedInput.getPointer(), Idxs,
6089        /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6090        ".rd_input.gep.");
6091    LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6092    // ElemLVal.reduce_shar = &Shareds[Cnt];
6093    LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6094    RCG.emitSharedOrigLValue(CGF, Cnt);
6095    llvm::Value *CastedShared =
6096        CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6097    CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6098    // ElemLVal.reduce_orig = &Origs[Cnt];
6099    LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6100    llvm::Value *CastedOrig =
6101        CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6102    CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6103    RCG.emitAggregateType(CGF, Cnt);
6104    llvm::Value *SizeValInChars;
6105    llvm::Value *SizeVal;
6106    std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6107    // We use delayed creation/initialization for VLAs and array sections. It is
6108    // required because runtime does not provide the way to pass the sizes of
6109    // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6110    // threadprivate global variables are used to store these values and use
6111    // them in the functions.
6112    bool DelayedCreation = !!SizeVal;
6113    SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6114                                               /*isSigned=*/false);
6115    LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6116    CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6117    // ElemLVal.reduce_init = init;
6118    LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6119    llvm::Value *InitAddr =
6120        CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6121    CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6122    // ElemLVal.reduce_fini = fini;
6123    LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6124    llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6125    llvm::Value *FiniAddr = Fini
6126                                ? CGF.EmitCastToVoidPtr(Fini)
6127                                : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6128    CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6129    // ElemLVal.reduce_comb = comb;
6130    LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6131    llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6132        CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6133        RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6134    CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6135    // ElemLVal.flags = 0;
6136    LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6137    if (DelayedCreation) {
6138      CGF.EmitStoreOfScalar(
6139          llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6140          FlagsLVal);
6141    } else
6142      CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6143                                 FlagsLVal.getType());
6144  }
6145  if (Data.IsReductionWithTaskMod) {
6146    // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6147    // is_ws, int num, void *data);
6148    llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6149    llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6150                                                  CGM.IntTy, /*isSigned=*/true);
6151    llvm::Value *Args[] = {
6152        IdentTLoc, GTid,
6153        llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6154                               /*isSigned=*/true),
6155        llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6156        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6157            TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6158    return CGF.EmitRuntimeCall(
6159        OMPBuilder.getOrCreateRuntimeFunction(
6160            CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6161        Args);
6162  }
6163  // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6164  llvm::Value *Args[] = {
6165      CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6166                                /*isSigned=*/true),
6167      llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6168      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6169                                                      CGM.VoidPtrTy)};
6170  return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6171                                 CGM.getModule(), OMPRTL___kmpc_taskred_init),
6172                             Args);
6173}
6174
6175void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6176                                            SourceLocation Loc,
6177                                            bool IsWorksharingReduction) {
6178  // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6179  // is_ws, int num, void *data);
6180  llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6181  llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6182                                                CGM.IntTy, /*isSigned=*/true);
6183  llvm::Value *Args[] = {IdentTLoc, GTid,
6184                         llvm::ConstantInt::get(CGM.IntTy,
6185                                                IsWorksharingReduction ? 1 : 0,
6186                                                /*isSigned=*/true)};
6187  (void)CGF.EmitRuntimeCall(
6188      OMPBuilder.getOrCreateRuntimeFunction(
6189          CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6190      Args);
6191}
6192
6193void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6194                                              SourceLocation Loc,
6195                                              ReductionCodeGen &RCG,
6196                                              unsigned N) {
6197  auto Sizes = RCG.getSizes(N);
6198  // Emit threadprivate global variable if the type is non-constant
6199  // (Sizes.second = nullptr).
6200  if (Sizes.second) {
6201    llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6202                                                     /*isSigned=*/false);
6203    Address SizeAddr = getAddrOfArtificialThreadPrivate(
6204        CGF, CGM.getContext().getSizeType(),
6205        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6206    CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6207  }
6208}
6209
6210Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6211                                              SourceLocation Loc,
6212                                              llvm::Value *ReductionsPtr,
6213                                              LValue SharedLVal) {
6214  // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6215  // *d);
6216  llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6217                                                   CGM.IntTy,
6218                                                   /*isSigned=*/true),
6219                         ReductionsPtr,
6220                         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6221                             SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6222  return Address(
6223      CGF.EmitRuntimeCall(
6224          OMPBuilder.getOrCreateRuntimeFunction(
6225              CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6226          Args),
6227      SharedLVal.getAlignment());
6228}
6229
6230void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6231                                       SourceLocation Loc) {
6232  if (!CGF.HaveInsertPoint())
6233    return;
6234
6235  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6236    OMPBuilder.createTaskwait(CGF.Builder);
6237  } else {
6238    // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6239    // global_tid);
6240    llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6241    // Ignore return result until untied tasks are supported.
6242    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6243                            CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6244                        Args);
6245  }
6246
6247  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6248    Region->emitUntiedSwitch(CGF);
6249}
6250
6251void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6252                                           OpenMPDirectiveKind InnerKind,
6253                                           const RegionCodeGenTy &CodeGen,
6254                                           bool HasCancel) {
6255  if (!CGF.HaveInsertPoint())
6256    return;
6257  InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6258                                 InnerKind != OMPD_critical &&
6259                                     InnerKind != OMPD_master &&
6260                                     InnerKind != OMPD_masked);
6261  CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6262}
6263
6264namespace {
6265enum RTCancelKind {
6266  CancelNoreq = 0,
6267  CancelParallel = 1,
6268  CancelLoop = 2,
6269  CancelSections = 3,
6270  CancelTaskgroup = 4
6271};
6272} // anonymous namespace
6273
6274static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6275  RTCancelKind CancelKind = CancelNoreq;
6276  if (CancelRegion == OMPD_parallel)
6277    CancelKind = CancelParallel;
6278  else if (CancelRegion == OMPD_for)
6279    CancelKind = CancelLoop;
6280  else if (CancelRegion == OMPD_sections)
6281    CancelKind = CancelSections;
6282  else {
6283    assert(CancelRegion == OMPD_taskgroup);
6284    CancelKind = CancelTaskgroup;
6285  }
6286  return CancelKind;
6287}
6288
6289void CGOpenMPRuntime::emitCancellationPointCall(
6290    CodeGenFunction &CGF, SourceLocation Loc,
6291    OpenMPDirectiveKind CancelRegion) {
6292  if (!CGF.HaveInsertPoint())
6293    return;
6294  // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6295  // global_tid, kmp_int32 cncl_kind);
6296  if (auto *OMPRegionInfo =
6297          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6298    // For 'cancellation point taskgroup', the task region info may not have a
6299    // cancel. This may instead happen in another adjacent task.
6300    if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6301      llvm::Value *Args[] = {
6302          emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6303          CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6304      // Ignore return result until untied tasks are supported.
6305      llvm::Value *Result = CGF.EmitRuntimeCall(
6306          OMPBuilder.getOrCreateRuntimeFunction(
6307              CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6308          Args);
6309      // if (__kmpc_cancellationpoint()) {
6310      //   exit from construct;
6311      // }
6312      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6313      llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6314      llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6315      CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6316      CGF.EmitBlock(ExitBB);
6317      // exit from construct;
6318      CodeGenFunction::JumpDest CancelDest =
6319          CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6320      CGF.EmitBranchThroughCleanup(CancelDest);
6321      CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6322    }
6323  }
6324}
6325
6326void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6327                                     const Expr *IfCond,
6328                                     OpenMPDirectiveKind CancelRegion) {
6329  if (!CGF.HaveInsertPoint())
6330    return;
6331  // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6332  // kmp_int32 cncl_kind);
6333  auto &M = CGM.getModule();
6334  if (auto *OMPRegionInfo =
6335          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6336    auto &&ThenGen = [this, &M, Loc, CancelRegion,
6337                      OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6338      CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6339      llvm::Value *Args[] = {
6340          RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6341          CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6342      // Ignore return result until untied tasks are supported.
6343      llvm::Value *Result = CGF.EmitRuntimeCall(
6344          OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6345      // if (__kmpc_cancel()) {
6346      //   exit from construct;
6347      // }
6348      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6349      llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6350      llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6351      CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6352      CGF.EmitBlock(ExitBB);
6353      // exit from construct;
6354      CodeGenFunction::JumpDest CancelDest =
6355          CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6356      CGF.EmitBranchThroughCleanup(CancelDest);
6357      CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6358    };
6359    if (IfCond) {
6360      emitIfClause(CGF, IfCond, ThenGen,
6361                   [](CodeGenFunction &, PrePostActionTy &) {});
6362    } else {
6363      RegionCodeGenTy ThenRCG(ThenGen);
6364      ThenRCG(CGF);
6365    }
6366  }
6367}
6368
6369namespace {
6370/// Cleanup action for uses_allocators support.
6371class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6372  ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6373
6374public:
6375  OMPUsesAllocatorsActionTy(
6376      ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6377      : Allocators(Allocators) {}
6378  void Enter(CodeGenFunction &CGF) override {
6379    if (!CGF.HaveInsertPoint())
6380      return;
6381    for (const auto &AllocatorData : Allocators) {
6382      CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6383          CGF, AllocatorData.first, AllocatorData.second);
6384    }
6385  }
6386  void Exit(CodeGenFunction &CGF) override {
6387    if (!CGF.HaveInsertPoint())
6388      return;
6389    for (const auto &AllocatorData : Allocators) {
6390      CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6391                                                        AllocatorData.first);
6392    }
6393  }
6394};
6395} // namespace
6396
6397void CGOpenMPRuntime::emitTargetOutlinedFunction(
6398    const OMPExecutableDirective &D, StringRef ParentName,
6399    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6400    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6401  assert(!ParentName.empty() && "Invalid target region parent name!");
6402  HasEmittedTargetRegion = true;
6403  SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6404  for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6405    for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6406      const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6407      if (!D.AllocatorTraits)
6408        continue;
6409      Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6410    }
6411  }
6412  OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6413  CodeGen.setAction(UsesAllocatorAction);
6414  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6415                                   IsOffloadEntry, CodeGen);
6416}
6417
6418void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6419                                             const Expr *Allocator,
6420                                             const Expr *AllocatorTraits) {
6421  llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6422  ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6423  // Use default memspace handle.
6424  llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6425  llvm::Value *NumTraits = llvm::ConstantInt::get(
6426      CGF.IntTy, cast<ConstantArrayType>(
6427                     AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6428                     ->getSize()
6429                     .getLimitedValue());
6430  LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6431  Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6432      AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6433  AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6434                                           AllocatorTraitsLVal.getBaseInfo(),
6435                                           AllocatorTraitsLVal.getTBAAInfo());
6436  llvm::Value *Traits =
6437      CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6438
6439  llvm::Value *AllocatorVal =
6440      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6441                              CGM.getModule(), OMPRTL___kmpc_init_allocator),
6442                          {ThreadId, MemSpaceHandle, NumTraits, Traits});
6443  // Store to allocator.
6444  CGF.EmitVarDecl(*cast<VarDecl>(
6445      cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6446  LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6447  AllocatorVal =
6448      CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6449                               Allocator->getType(), Allocator->getExprLoc());
6450  CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6451}
6452
6453void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6454                                             const Expr *Allocator) {
6455  llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6456  ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6457  LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6458  llvm::Value *AllocatorVal =
6459      CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6460  AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6461                                          CGF.getContext().VoidPtrTy,
6462                                          Allocator->getExprLoc());
6463  (void)CGF.EmitRuntimeCall(
6464      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6465                                            OMPRTL___kmpc_destroy_allocator),
6466      {ThreadId, AllocatorVal});
6467}
6468
6469void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6470    const OMPExecutableDirective &D, StringRef ParentName,
6471    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6472    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6473  // Create a unique name for the entry function using the source location
6474  // information of the current target region. The name will be something like:
6475  //
6476  // __omp_offloading_DD_FFFF_PP_lBB
6477  //
6478  // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6479  // mangled name of the function that encloses the target region and BB is the
6480  // line number of the target region.
6481
6482  unsigned DeviceID;
6483  unsigned FileID;
6484  unsigned Line;
6485  getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6486                           Line);
6487  SmallString<64> EntryFnName;
6488  {
6489    llvm::raw_svector_ostream OS(EntryFnName);
6490    OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6491       << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6492  }
6493
6494  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6495
6496  CodeGenFunction CGF(CGM, true);
6497  CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6498  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6499
6500  OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6501
6502  // If this target outline function is not an offload entry, we don't need to
6503  // register it.
6504  if (!IsOffloadEntry)
6505    return;
6506
6507  // The target region ID is used by the runtime library to identify the current
6508  // target region, so it only has to be unique and not necessarily point to
6509  // anything. It could be the pointer to the outlined function that implements
6510  // the target region, but we aren't using that so that the compiler doesn't
6511  // need to keep that, and could therefore inline the host function if proven
6512  // worthwhile during optimization. In the other hand, if emitting code for the
6513  // device, the ID has to be the function address so that it can retrieved from
6514  // the offloading entry and launched by the runtime library. We also mark the
6515  // outlined function to have external linkage in case we are emitting code for
6516  // the device, because these functions will be entry points to the device.
6517
6518  if (CGM.getLangOpts().OpenMPIsDevice) {
6519    OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6520    OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6521    OutlinedFn->setDSOLocal(false);
6522    if (CGM.getTriple().isAMDGCN())
6523      OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6524  } else {
6525    std::string Name = getName({EntryFnName, "region_id"});
6526    OutlinedFnID = new llvm::GlobalVariable(
6527        CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6528        llvm::GlobalValue::WeakAnyLinkage,
6529        llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6530  }
6531
6532  // Register the information for the entry associated with this target region.
6533  OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6534      DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6535      OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6536}
6537
6538/// Checks if the expression is constant or does not have non-trivial function
6539/// calls.
6540static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6541  // We can skip constant expressions.
6542  // We can skip expressions with trivial calls or simple expressions.
6543  return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6544          !E->hasNonTrivialCall(Ctx)) &&
6545         !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6546}
6547
6548const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6549                                                    const Stmt *Body) {
6550  const Stmt *Child = Body->IgnoreContainers();
6551  while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6552    Child = nullptr;
6553    for (const Stmt *S : C->body()) {
6554      if (const auto *E = dyn_cast<Expr>(S)) {
6555        if (isTrivial(Ctx, E))
6556          continue;
6557      }
6558      // Some of the statements can be ignored.
6559      if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6560          isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6561        continue;
6562      // Analyze declarations.
6563      if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6564        if (llvm::all_of(DS->decls(), [](const Decl *D) {
6565              if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6566                  isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6567                  isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6568                  isa<UsingDirectiveDecl>(D) ||
6569                  isa<OMPDeclareReductionDecl>(D) ||
6570                  isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6571                return true;
6572              const auto *VD = dyn_cast<VarDecl>(D);
6573              if (!VD)
6574                return false;
6575              return VD->hasGlobalStorage() || !VD->isUsed();
6576            }))
6577          continue;
6578      }
6579      // Found multiple children - cannot get the one child only.
6580      if (Child)
6581        return nullptr;
6582      Child = S;
6583    }
6584    if (Child)
6585      Child = Child->IgnoreContainers();
6586  }
6587  return Child;
6588}
6589
6590/// Emit the number of teams for a target directive.  Inspect the num_teams
6591/// clause associated with a teams construct combined or closely nested
6592/// with the target directive.
6593///
6594/// Emit a team of size one for directives such as 'target parallel' that
6595/// have no associated teams construct.
6596///
6597/// Otherwise, return nullptr.
6598static llvm::Value *
6599emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6600                               const OMPExecutableDirective &D) {
6601  assert(!CGF.getLangOpts().OpenMPIsDevice &&
6602         "Clauses associated with the teams directive expected to be emitted "
6603         "only for the host!");
6604  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6605  assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6606         "Expected target-based executable directive.");
6607  CGBuilderTy &Bld = CGF.Builder;
6608  switch (DirectiveKind) {
6609  case OMPD_target: {
6610    const auto *CS = D.getInnermostCapturedStmt();
6611    const auto *Body =
6612        CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6613    const Stmt *ChildStmt =
6614        CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6615    if (const auto *NestedDir =
6616            dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6617      if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6618        if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6619          CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6620          CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6621          const Expr *NumTeams =
6622              NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6623          llvm::Value *NumTeamsVal =
6624              CGF.EmitScalarExpr(NumTeams,
6625                                 /*IgnoreResultAssign*/ true);
6626          return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6627                                   /*isSigned=*/true);
6628        }
6629        return Bld.getInt32(0);
6630      }
6631      if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6632          isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6633        return Bld.getInt32(1);
6634      return Bld.getInt32(0);
6635    }
6636    return nullptr;
6637  }
6638  case OMPD_target_teams:
6639  case OMPD_target_teams_distribute:
6640  case OMPD_target_teams_distribute_simd:
6641  case OMPD_target_teams_distribute_parallel_for:
6642  case OMPD_target_teams_distribute_parallel_for_simd: {
6643    if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6644      CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6645      const Expr *NumTeams =
6646          D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6647      llvm::Value *NumTeamsVal =
6648          CGF.EmitScalarExpr(NumTeams,
6649                             /*IgnoreResultAssign*/ true);
6650      return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6651                               /*isSigned=*/true);
6652    }
6653    return Bld.getInt32(0);
6654  }
6655  case OMPD_target_parallel:
6656  case OMPD_target_parallel_for:
6657  case OMPD_target_parallel_for_simd:
6658  case OMPD_target_simd:
6659    return Bld.getInt32(1);
6660  case OMPD_parallel:
6661  case OMPD_for:
6662  case OMPD_parallel_for:
6663  case OMPD_parallel_master:
6664  case OMPD_parallel_sections:
6665  case OMPD_for_simd:
6666  case OMPD_parallel_for_simd:
6667  case OMPD_cancel:
6668  case OMPD_cancellation_point:
6669  case OMPD_ordered:
6670  case OMPD_threadprivate:
6671  case OMPD_allocate:
6672  case OMPD_task:
6673  case OMPD_simd:
6674  case OMPD_tile:
6675  case OMPD_sections:
6676  case OMPD_section:
6677  case OMPD_single:
6678  case OMPD_master:
6679  case OMPD_critical:
6680  case OMPD_taskyield:
6681  case OMPD_barrier:
6682  case OMPD_taskwait:
6683  case OMPD_taskgroup:
6684  case OMPD_atomic:
6685  case OMPD_flush:
6686  case OMPD_depobj:
6687  case OMPD_scan:
6688  case OMPD_teams:
6689  case OMPD_target_data:
6690  case OMPD_target_exit_data:
6691  case OMPD_target_enter_data:
6692  case OMPD_distribute:
6693  case OMPD_distribute_simd:
6694  case OMPD_distribute_parallel_for:
6695  case OMPD_distribute_parallel_for_simd:
6696  case OMPD_teams_distribute:
6697  case OMPD_teams_distribute_simd:
6698  case OMPD_teams_distribute_parallel_for:
6699  case OMPD_teams_distribute_parallel_for_simd:
6700  case OMPD_target_update:
6701  case OMPD_declare_simd:
6702  case OMPD_declare_variant:
6703  case OMPD_begin_declare_variant:
6704  case OMPD_end_declare_variant:
6705  case OMPD_declare_target:
6706  case OMPD_end_declare_target:
6707  case OMPD_declare_reduction:
6708  case OMPD_declare_mapper:
6709  case OMPD_taskloop:
6710  case OMPD_taskloop_simd:
6711  case OMPD_master_taskloop:
6712  case OMPD_master_taskloop_simd:
6713  case OMPD_parallel_master_taskloop:
6714  case OMPD_parallel_master_taskloop_simd:
6715  case OMPD_requires:
6716  case OMPD_unknown:
6717    break;
6718  default:
6719    break;
6720  }
6721  llvm_unreachable("Unexpected directive kind.");
6722}
6723
6724static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6725                                  llvm::Value *DefaultThreadLimitVal) {
6726  const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6727      CGF.getContext(), CS->getCapturedStmt());
6728  if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6729    if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6730      llvm::Value *NumThreads = nullptr;
6731      llvm::Value *CondVal = nullptr;
6732      // Handle if clause. If if clause present, the number of threads is
6733      // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6734      if (Dir->hasClausesOfKind<OMPIfClause>()) {
6735        CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6736        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6737        const OMPIfClause *IfClause = nullptr;
6738        for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6739          if (C->getNameModifier() == OMPD_unknown ||
6740              C->getNameModifier() == OMPD_parallel) {
6741            IfClause = C;
6742            break;
6743          }
6744        }
6745        if (IfClause) {
6746          const Expr *Cond = IfClause->getCondition();
6747          bool Result;
6748          if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6749            if (!Result)
6750              return CGF.Builder.getInt32(1);
6751          } else {
6752            CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6753            if (const auto *PreInit =
6754                    cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6755              for (const auto *I : PreInit->decls()) {
6756                if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6757                  CGF.EmitVarDecl(cast<VarDecl>(*I));
6758                } else {
6759                  CodeGenFunction::AutoVarEmission Emission =
6760                      CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6761                  CGF.EmitAutoVarCleanups(Emission);
6762                }
6763              }
6764            }
6765            CondVal = CGF.EvaluateExprAsBool(Cond);
6766          }
6767        }
6768      }
6769      // Check the value of num_threads clause iff if clause was not specified
6770      // or is not evaluated to false.
6771      if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6772        CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6773        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6774        const auto *NumThreadsClause =
6775            Dir->getSingleClause<OMPNumThreadsClause>();
6776        CodeGenFunction::LexicalScope Scope(
6777            CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6778        if (const auto *PreInit =
6779                cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6780          for (const auto *I : PreInit->decls()) {
6781            if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6782              CGF.EmitVarDecl(cast<VarDecl>(*I));
6783            } else {
6784              CodeGenFunction::AutoVarEmission Emission =
6785                  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6786              CGF.EmitAutoVarCleanups(Emission);
6787            }
6788          }
6789        }
6790        NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6791        NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6792                                               /*isSigned=*/false);
6793        if (DefaultThreadLimitVal)
6794          NumThreads = CGF.Builder.CreateSelect(
6795              CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6796              DefaultThreadLimitVal, NumThreads);
6797      } else {
6798        NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6799                                           : CGF.Builder.getInt32(0);
6800      }
6801      // Process condition of the if clause.
6802      if (CondVal) {
6803        NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6804                                              CGF.Builder.getInt32(1));
6805      }
6806      return NumThreads;
6807    }
6808    if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6809      return CGF.Builder.getInt32(1);
6810    return DefaultThreadLimitVal;
6811  }
6812  return DefaultThreadLimitVal ? DefaultThreadLimitVal
6813                               : CGF.Builder.getInt32(0);
6814}
6815
6816/// Emit the number of threads for a target directive.  Inspect the
6817/// thread_limit clause associated with a teams construct combined or closely
6818/// nested with the target directive.
6819///
6820/// Emit the num_threads clause for directives such as 'target parallel' that
6821/// have no associated teams construct.
6822///
6823/// Otherwise, return nullptr.
6824static llvm::Value *
6825emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6826                                 const OMPExecutableDirective &D) {
6827  assert(!CGF.getLangOpts().OpenMPIsDevice &&
6828         "Clauses associated with the teams directive expected to be emitted "
6829         "only for the host!");
6830  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6831  assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6832         "Expected target-based executable directive.");
6833  CGBuilderTy &Bld = CGF.Builder;
6834  llvm::Value *ThreadLimitVal = nullptr;
6835  llvm::Value *NumThreadsVal = nullptr;
6836  switch (DirectiveKind) {
6837  case OMPD_target: {
6838    const CapturedStmt *CS = D.getInnermostCapturedStmt();
6839    if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6840      return NumThreads;
6841    const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6842        CGF.getContext(), CS->getCapturedStmt());
6843    if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6844      if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6845        CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6846        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6847        const auto *ThreadLimitClause =
6848            Dir->getSingleClause<OMPThreadLimitClause>();
6849        CodeGenFunction::LexicalScope Scope(
6850            CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6851        if (const auto *PreInit =
6852                cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6853          for (const auto *I : PreInit->decls()) {
6854            if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6855              CGF.EmitVarDecl(cast<VarDecl>(*I));
6856            } else {
6857              CodeGenFunction::AutoVarEmission Emission =
6858                  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6859              CGF.EmitAutoVarCleanups(Emission);
6860            }
6861          }
6862        }
6863        llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6864            ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6865        ThreadLimitVal =
6866            Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6867      }
6868      if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6869          !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6870        CS = Dir->getInnermostCapturedStmt();
6871        const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6872            CGF.getContext(), CS->getCapturedStmt());
6873        Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6874      }
6875      if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6876          !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6877        CS = Dir->getInnermostCapturedStmt();
6878        if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6879          return NumThreads;
6880      }
6881      if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6882        return Bld.getInt32(1);
6883    }
6884    return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6885  }
6886  case OMPD_target_teams: {
6887    if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6888      CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6889      const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6890      llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6891          ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6892      ThreadLimitVal =
6893          Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6894    }
6895    const CapturedStmt *CS = D.getInnermostCapturedStmt();
6896    if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6897      return NumThreads;
6898    const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6899        CGF.getContext(), CS->getCapturedStmt());
6900    if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6901      if (Dir->getDirectiveKind() == OMPD_distribute) {
6902        CS = Dir->getInnermostCapturedStmt();
6903        if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6904          return NumThreads;
6905      }
6906    }
6907    return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6908  }
6909  case OMPD_target_teams_distribute:
6910    if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6911      CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6912      const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6913      llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6914          ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6915      ThreadLimitVal =
6916          Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6917    }
6918    return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6919  case OMPD_target_parallel:
6920  case OMPD_target_parallel_for:
6921  case OMPD_target_parallel_for_simd:
6922  case OMPD_target_teams_distribute_parallel_for:
6923  case OMPD_target_teams_distribute_parallel_for_simd: {
6924    llvm::Value *CondVal = nullptr;
6925    // Handle if clause. If if clause present, the number of threads is
6926    // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6927    if (D.hasClausesOfKind<OMPIfClause>()) {
6928      const OMPIfClause *IfClause = nullptr;
6929      for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6930        if (C->getNameModifier() == OMPD_unknown ||
6931            C->getNameModifier() == OMPD_parallel) {
6932          IfClause = C;
6933          break;
6934        }
6935      }
6936      if (IfClause) {
6937        const Expr *Cond = IfClause->getCondition();
6938        bool Result;
6939        if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6940          if (!Result)
6941            return Bld.getInt32(1);
6942        } else {
6943          CodeGenFunction::RunCleanupsScope Scope(CGF);
6944          CondVal = CGF.EvaluateExprAsBool(Cond);
6945        }
6946      }
6947    }
6948    if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6949      CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6950      const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6951      llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6952          ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6953      ThreadLimitVal =
6954          Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6955    }
6956    if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6957      CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6958      const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6959      llvm::Value *NumThreads = CGF.EmitScalarExpr(
6960          NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6961      NumThreadsVal =
6962          Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6963      ThreadLimitVal = ThreadLimitVal
6964                           ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6965                                                                ThreadLimitVal),
6966                                              NumThreadsVal, ThreadLimitVal)
6967                           : NumThreadsVal;
6968    }
6969    if (!ThreadLimitVal)
6970      ThreadLimitVal = Bld.getInt32(0);
6971    if (CondVal)
6972      return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6973    return ThreadLimitVal;
6974  }
6975  case OMPD_target_teams_distribute_simd:
6976  case OMPD_target_simd:
6977    return Bld.getInt32(1);
6978  case OMPD_parallel:
6979  case OMPD_for:
6980  case OMPD_parallel_for:
6981  case OMPD_parallel_master:
6982  case OMPD_parallel_sections:
6983  case OMPD_for_simd:
6984  case OMPD_parallel_for_simd:
6985  case OMPD_cancel:
6986  case OMPD_cancellation_point:
6987  case OMPD_ordered:
6988  case OMPD_threadprivate:
6989  case OMPD_allocate:
6990  case OMPD_task:
6991  case OMPD_simd:
6992  case OMPD_tile:
6993  case OMPD_sections:
6994  case OMPD_section:
6995  case OMPD_single:
6996  case OMPD_master:
6997  case OMPD_critical:
6998  case OMPD_taskyield:
6999  case OMPD_barrier:
7000  case OMPD_taskwait:
7001  case OMPD_taskgroup:
7002  case OMPD_atomic:
7003  case OMPD_flush:
7004  case OMPD_depobj:
7005  case OMPD_scan:
7006  case OMPD_teams:
7007  case OMPD_target_data:
7008  case OMPD_target_exit_data:
7009  case OMPD_target_enter_data:
7010  case OMPD_distribute:
7011  case OMPD_distribute_simd:
7012  case OMPD_distribute_parallel_for:
7013  case OMPD_distribute_parallel_for_simd:
7014  case OMPD_teams_distribute:
7015  case OMPD_teams_distribute_simd:
7016  case OMPD_teams_distribute_parallel_for:
7017  case OMPD_teams_distribute_parallel_for_simd:
7018  case OMPD_target_update:
7019  case OMPD_declare_simd:
7020  case OMPD_declare_variant:
7021  case OMPD_begin_declare_variant:
7022  case OMPD_end_declare_variant:
7023  case OMPD_declare_target:
7024  case OMPD_end_declare_target:
7025  case OMPD_declare_reduction:
7026  case OMPD_declare_mapper:
7027  case OMPD_taskloop:
7028  case OMPD_taskloop_simd:
7029  case OMPD_master_taskloop:
7030  case OMPD_master_taskloop_simd:
7031  case OMPD_parallel_master_taskloop:
7032  case OMPD_parallel_master_taskloop_simd:
7033  case OMPD_requires:
7034  case OMPD_unknown:
7035    break;
7036  default:
7037    break;
7038  }
7039  llvm_unreachable("Unsupported directive kind.");
7040}
7041
7042namespace {
7043LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7044
7045// Utility to handle information from clauses associated with a given
7046// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7047// It provides a convenient interface to obtain the information and generate
7048// code for that information.
7049class MappableExprsHandler {
7050public:
7051  /// Values for bit flags used to specify the mapping type for
7052  /// offloading.
7053  enum OpenMPOffloadMappingFlags : uint64_t {
7054    /// No flags
7055    OMP_MAP_NONE = 0x0,
7056    /// Allocate memory on the device and move data from host to device.
7057    OMP_MAP_TO = 0x01,
7058    /// Allocate memory on the device and move data from device to host.
7059    OMP_MAP_FROM = 0x02,
7060    /// Always perform the requested mapping action on the element, even
7061    /// if it was already mapped before.
7062    OMP_MAP_ALWAYS = 0x04,
7063    /// Delete the element from the device environment, ignoring the
7064    /// current reference count associated with the element.
7065    OMP_MAP_DELETE = 0x08,
7066    /// The element being mapped is a pointer-pointee pair; both the
7067    /// pointer and the pointee should be mapped.
7068    OMP_MAP_PTR_AND_OBJ = 0x10,
7069    /// This flags signals that the base address of an entry should be
7070    /// passed to the target kernel as an argument.
7071    OMP_MAP_TARGET_PARAM = 0x20,
7072    /// Signal that the runtime library has to return the device pointer
7073    /// in the current position for the data being mapped. Used when we have the
7074    /// use_device_ptr or use_device_addr clause.
7075    OMP_MAP_RETURN_PARAM = 0x40,
7076    /// This flag signals that the reference being passed is a pointer to
7077    /// private data.
7078    OMP_MAP_PRIVATE = 0x80,
7079    /// Pass the element to the device by value.
7080    OMP_MAP_LITERAL = 0x100,
7081    /// Implicit map
7082    OMP_MAP_IMPLICIT = 0x200,
7083    /// Close is a hint to the runtime to allocate memory close to
7084    /// the target device.
7085    OMP_MAP_CLOSE = 0x400,
7086    /// 0x800 is reserved for compatibility with XLC.
7087    /// Produce a runtime error if the data is not already allocated.
7088    OMP_MAP_PRESENT = 0x1000,
7089    /// Signal that the runtime library should use args as an array of
7090    /// descriptor_dim pointers and use args_size as dims. Used when we have
7091    /// non-contiguous list items in target update directive
7092    OMP_MAP_NON_CONTIG = 0x100000000000,
7093    /// The 16 MSBs of the flags indicate whether the entry is member of some
7094    /// struct/class.
7095    OMP_MAP_MEMBER_OF = 0xffff000000000000,
7096    LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7097  };
7098
7099  /// Get the offset of the OMP_MAP_MEMBER_OF field.
7100  static unsigned getFlagMemberOffset() {
7101    unsigned Offset = 0;
7102    for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7103         Remain = Remain >> 1)
7104      Offset++;
7105    return Offset;
7106  }
7107
7108  /// Class that holds debugging information for a data mapping to be passed to
7109  /// the runtime library.
7110  class MappingExprInfo {
7111    /// The variable declaration used for the data mapping.
7112    const ValueDecl *MapDecl = nullptr;
7113    /// The original expression used in the map clause, or null if there is
7114    /// none.
7115    const Expr *MapExpr = nullptr;
7116
7117  public:
7118    MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7119        : MapDecl(MapDecl), MapExpr(MapExpr) {}
7120
7121    const ValueDecl *getMapDecl() const { return MapDecl; }
7122    const Expr *getMapExpr() const { return MapExpr; }
7123  };
7124
7125  /// Class that associates information with a base pointer to be passed to the
7126  /// runtime library.
7127  class BasePointerInfo {
7128    /// The base pointer.
7129    llvm::Value *Ptr = nullptr;
7130    /// The base declaration that refers to this device pointer, or null if
7131    /// there is none.
7132    const ValueDecl *DevPtrDecl = nullptr;
7133
7134  public:
7135    BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7136        : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7137    llvm::Value *operator*() const { return Ptr; }
7138    const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7139    void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7140  };
7141
7142  using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7143  using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7144  using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7145  using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7146  using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7147  using MapDimArrayTy = SmallVector<uint64_t, 4>;
7148  using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7149
7150  /// This structure contains combined information generated for mappable
7151  /// clauses, including base pointers, pointers, sizes, map types, user-defined
7152  /// mappers, and non-contiguous information.
7153  struct MapCombinedInfoTy {
7154    struct StructNonContiguousInfo {
7155      bool IsNonContiguous = false;
7156      MapDimArrayTy Dims;
7157      MapNonContiguousArrayTy Offsets;
7158      MapNonContiguousArrayTy Counts;
7159      MapNonContiguousArrayTy Strides;
7160    };
7161    MapExprsArrayTy Exprs;
7162    MapBaseValuesArrayTy BasePointers;
7163    MapValuesArrayTy Pointers;
7164    MapValuesArrayTy Sizes;
7165    MapFlagsArrayTy Types;
7166    MapMappersArrayTy Mappers;
7167    StructNonContiguousInfo NonContigInfo;
7168
7169    /// Append arrays in \a CurInfo.
7170    void append(MapCombinedInfoTy &CurInfo) {
7171      Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7172      BasePointers.append(CurInfo.BasePointers.begin(),
7173                          CurInfo.BasePointers.end());
7174      Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7175      Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7176      Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7177      Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7178      NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7179                                 CurInfo.NonContigInfo.Dims.end());
7180      NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7181                                    CurInfo.NonContigInfo.Offsets.end());
7182      NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7183                                   CurInfo.NonContigInfo.Counts.end());
7184      NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7185                                    CurInfo.NonContigInfo.Strides.end());
7186    }
7187  };
7188
7189  /// Map between a struct and the its lowest & highest elements which have been
7190  /// mapped.
7191  /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7192  ///                    HE(FieldIndex, Pointer)}
7193  struct StructRangeInfoTy {
7194    MapCombinedInfoTy PreliminaryMapData;
7195    std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7196        0, Address::invalid()};
7197    std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7198        0, Address::invalid()};
7199    Address Base = Address::invalid();
7200    Address LB = Address::invalid();
7201    bool IsArraySection = false;
7202    bool HasCompleteRecord = false;
7203  };
7204
7205private:
7206  /// Kind that defines how a device pointer has to be returned.
7207  struct MapInfo {
7208    OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7209    OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7210    ArrayRef<OpenMPMapModifierKind> MapModifiers;
7211    ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7212    bool ReturnDevicePointer = false;
7213    bool IsImplicit = false;
7214    const ValueDecl *Mapper = nullptr;
7215    const Expr *VarRef = nullptr;
7216    bool ForDeviceAddr = false;
7217
7218    MapInfo() = default;
7219    MapInfo(
7220        OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7221        OpenMPMapClauseKind MapType,
7222        ArrayRef<OpenMPMapModifierKind> MapModifiers,
7223        ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7224        bool ReturnDevicePointer, bool IsImplicit,
7225        const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7226        bool ForDeviceAddr = false)
7227        : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7228          MotionModifiers(MotionModifiers),
7229          ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7230          Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7231  };
7232
7233  /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7234  /// member and there is no map information about it, then emission of that
7235  /// entry is deferred until the whole struct has been processed.
7236  struct DeferredDevicePtrEntryTy {
7237    const Expr *IE = nullptr;
7238    const ValueDecl *VD = nullptr;
7239    bool ForDeviceAddr = false;
7240
7241    DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7242                             bool ForDeviceAddr)
7243        : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7244  };
7245
7246  /// The target directive from where the mappable clauses were extracted. It
7247  /// is either a executable directive or a user-defined mapper directive.
7248  llvm::PointerUnion<const OMPExecutableDirective *,
7249                     const OMPDeclareMapperDecl *>
7250      CurDir;
7251
7252  /// Function the directive is being generated for.
7253  CodeGenFunction &CGF;
7254
7255  /// Set of all first private variables in the current directive.
7256  /// bool data is set to true if the variable is implicitly marked as
7257  /// firstprivate, false otherwise.
7258  llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7259
7260  /// Map between device pointer declarations and their expression components.
7261  /// The key value for declarations in 'this' is null.
7262  llvm::DenseMap<
7263      const ValueDecl *,
7264      SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7265      DevPointersMap;
7266
7267  llvm::Value *getExprTypeSize(const Expr *E) const {
7268    QualType ExprTy = E->getType().getCanonicalType();
7269
7270    // Calculate the size for array shaping expression.
7271    if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7272      llvm::Value *Size =
7273          CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7274      for (const Expr *SE : OAE->getDimensions()) {
7275        llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7276        Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7277                                      CGF.getContext().getSizeType(),
7278                                      SE->getExprLoc());
7279        Size = CGF.Builder.CreateNUWMul(Size, Sz);
7280      }
7281      return Size;
7282    }
7283
7284    // Reference types are ignored for mapping purposes.
7285    if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7286      ExprTy = RefTy->getPointeeType().getCanonicalType();
7287
7288    // Given that an array section is considered a built-in type, we need to
7289    // do the calculation based on the length of the section instead of relying
7290    // on CGF.getTypeSize(E->getType()).
7291    if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7292      QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7293                            OAE->getBase()->IgnoreParenImpCasts())
7294                            .getCanonicalType();
7295
7296      // If there is no length associated with the expression and lower bound is
7297      // not specified too, that means we are using the whole length of the
7298      // base.
7299      if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7300          !OAE->getLowerBound())
7301        return CGF.getTypeSize(BaseTy);
7302
7303      llvm::Value *ElemSize;
7304      if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7305        ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7306      } else {
7307        const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7308        assert(ATy && "Expecting array type if not a pointer type.");
7309        ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7310      }
7311
7312      // If we don't have a length at this point, that is because we have an
7313      // array section with a single element.
7314      if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7315        return ElemSize;
7316
7317      if (const Expr *LenExpr = OAE->getLength()) {
7318        llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7319        LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7320                                             CGF.getContext().getSizeType(),
7321                                             LenExpr->getExprLoc());
7322        return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7323      }
7324      assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7325             OAE->getLowerBound() && "expected array_section[lb:].");
7326      // Size = sizetype - lb * elemtype;
7327      llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7328      llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7329      LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7330                                       CGF.getContext().getSizeType(),
7331                                       OAE->getLowerBound()->getExprLoc());
7332      LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7333      llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7334      llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7335      LengthVal = CGF.Builder.CreateSelect(
7336          Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7337      return LengthVal;
7338    }
7339    return CGF.getTypeSize(ExprTy);
7340  }
7341
7342  /// Return the corresponding bits for a given map clause modifier. Add
7343  /// a flag marking the map as a pointer if requested. Add a flag marking the
7344  /// map as the first one of a series of maps that relate to the same map
7345  /// expression.
7346  OpenMPOffloadMappingFlags getMapTypeBits(
7347      OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7348      ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7349      bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7350    OpenMPOffloadMappingFlags Bits =
7351        IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7352    switch (MapType) {
7353    case OMPC_MAP_alloc:
7354    case OMPC_MAP_release:
7355      // alloc and release is the default behavior in the runtime library,  i.e.
7356      // if we don't pass any bits alloc/release that is what the runtime is
7357      // going to do. Therefore, we don't need to signal anything for these two
7358      // type modifiers.
7359      break;
7360    case OMPC_MAP_to:
7361      Bits |= OMP_MAP_TO;
7362      break;
7363    case OMPC_MAP_from:
7364      Bits |= OMP_MAP_FROM;
7365      break;
7366    case OMPC_MAP_tofrom:
7367      Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7368      break;
7369    case OMPC_MAP_delete:
7370      Bits |= OMP_MAP_DELETE;
7371      break;
7372    case OMPC_MAP_unknown:
7373      llvm_unreachable("Unexpected map type!");
7374    }
7375    if (AddPtrFlag)
7376      Bits |= OMP_MAP_PTR_AND_OBJ;
7377    if (AddIsTargetParamFlag)
7378      Bits |= OMP_MAP_TARGET_PARAM;
7379    if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7380        != MapModifiers.end())
7381      Bits |= OMP_MAP_ALWAYS;
7382    if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7383        != MapModifiers.end())
7384      Bits |= OMP_MAP_CLOSE;
7385    if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7386            MapModifiers.end() ||
7387        llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7388            MotionModifiers.end())
7389      Bits |= OMP_MAP_PRESENT;
7390    if (IsNonContiguous)
7391      Bits |= OMP_MAP_NON_CONTIG;
7392    return Bits;
7393  }
7394
7395  /// Return true if the provided expression is a final array section. A
7396  /// final array section, is one whose length can't be proved to be one.
7397  bool isFinalArraySectionExpression(const Expr *E) const {
7398    const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7399
7400    // It is not an array section and therefore not a unity-size one.
7401    if (!OASE)
7402      return false;
7403
7404    // An array section with no colon always refer to a single element.
7405    if (OASE->getColonLocFirst().isInvalid())
7406      return false;
7407
7408    const Expr *Length = OASE->getLength();
7409
7410    // If we don't have a length we have to check if the array has size 1
7411    // for this dimension. Also, we should always expect a length if the
7412    // base type is pointer.
7413    if (!Length) {
7414      QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7415                             OASE->getBase()->IgnoreParenImpCasts())
7416                             .getCanonicalType();
7417      if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7418        return ATy->getSize().getSExtValue() != 1;
7419      // If we don't have a constant dimension length, we have to consider
7420      // the current section as having any size, so it is not necessarily
7421      // unitary. If it happen to be unity size, that's user fault.
7422      return true;
7423    }
7424
7425    // Check if the length evaluates to 1.
7426    Expr::EvalResult Result;
7427    if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7428      return true; // Can have more that size 1.
7429
7430    llvm::APSInt ConstLength = Result.Val.getInt();
7431    return ConstLength.getSExtValue() != 1;
7432  }
7433
7434  /// Generate the base pointers, section pointers, sizes, map type bits, and
7435  /// user-defined mappers (all included in \a CombinedInfo) for the provided
7436  /// map type, map or motion modifiers, and expression components.
7437  /// \a IsFirstComponent should be set to true if the provided set of
7438  /// components is the first associated with a capture.
7439  void generateInfoForComponentList(
7440      OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7441      ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7442      OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7443      MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7444      bool IsFirstComponentList, bool IsImplicit,
7445      const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7446      const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7447      ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7448          OverlappedElements = llvm::None) const {
7449    // The following summarizes what has to be generated for each map and the
7450    // types below. The generated information is expressed in this order:
7451    // base pointer, section pointer, size, flags
7452    // (to add to the ones that come from the map type and modifier).
7453    //
7454    // double d;
7455    // int i[100];
7456    // float *p;
7457    //
7458    // struct S1 {
7459    //   int i;
7460    //   float f[50];
7461    // }
7462    // struct S2 {
7463    //   int i;
7464    //   float f[50];
7465    //   S1 s;
7466    //   double *p;
7467    //   struct S2 *ps;
7468    //   int &ref;
7469    // }
7470    // S2 s;
7471    // S2 *ps;
7472    //
7473    // map(d)
7474    // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7475    //
7476    // map(i)
7477    // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7478    //
7479    // map(i[1:23])
7480    // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7481    //
7482    // map(p)
7483    // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7484    //
7485    // map(p[1:24])
7486    // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7487    // in unified shared memory mode or for local pointers
7488    // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7489    //
7490    // map(s)
7491    // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7492    //
7493    // map(s.i)
7494    // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7495    //
7496    // map(s.s.f)
7497    // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7498    //
7499    // map(s.p)
7500    // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7501    //
7502    // map(to: s.p[:22])
7503    // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7504    // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7505    // &(s.p), &(s.p[0]), 22*sizeof(double),
7506    //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7507    // (*) alloc space for struct members, only this is a target parameter
7508    // (**) map the pointer (nothing to be mapped in this example) (the compiler
7509    //      optimizes this entry out, same in the examples below)
7510    // (***) map the pointee (map: to)
7511    //
7512    // map(to: s.ref)
7513    // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7514    // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7515    // (*) alloc space for struct members, only this is a target parameter
7516    // (**) map the pointer (nothing to be mapped in this example) (the compiler
7517    //      optimizes this entry out, same in the examples below)
7518    // (***) map the pointee (map: to)
7519    //
7520    // map(s.ps)
7521    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7522    //
7523    // map(from: s.ps->s.i)
7524    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7525    // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7526    // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7527    //
7528    // map(to: s.ps->ps)
7529    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7530    // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7531    // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7532    //
7533    // map(s.ps->ps->ps)
7534    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7535    // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7536    // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7537    // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7538    //
7539    // map(to: s.ps->ps->s.f[:22])
7540    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7541    // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7542    // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7543    // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7544    //
7545    // map(ps)
7546    // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7547    //
7548    // map(ps->i)
7549    // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7550    //
7551    // map(ps->s.f)
7552    // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7553    //
7554    // map(from: ps->p)
7555    // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7556    //
7557    // map(to: ps->p[:22])
7558    // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7559    // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7560    // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7561    //
7562    // map(ps->ps)
7563    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7564    //
7565    // map(from: ps->ps->s.i)
7566    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7567    // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7568    // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7569    //
7570    // map(from: ps->ps->ps)
7571    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7572    // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7573    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7574    //
7575    // map(ps->ps->ps->ps)
7576    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7577    // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7578    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7579    // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7580    //
7581    // map(to: ps->ps->ps->s.f[:22])
7582    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7583    // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7584    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7585    // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7586    //
7587    // map(to: s.f[:22]) map(from: s.p[:33])
7588    // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7589    //     sizeof(double*) (**), TARGET_PARAM
7590    // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7591    // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7592    // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7593    // (*) allocate contiguous space needed to fit all mapped members even if
7594    //     we allocate space for members not mapped (in this example,
7595    //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7596    //     them as well because they fall between &s.f[0] and &s.p)
7597    //
7598    // map(from: s.f[:22]) map(to: ps->p[:33])
7599    // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7600    // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7601    // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7602    // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7603    // (*) the struct this entry pertains to is the 2nd element in the list of
7604    //     arguments, hence MEMBER_OF(2)
7605    //
7606    // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7607    // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7608    // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7609    // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7610    // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7611    // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7612    // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7613    // (*) the struct this entry pertains to is the 4th element in the list
7614    //     of arguments, hence MEMBER_OF(4)
7615
7616    // Track if the map information being generated is the first for a capture.
7617    bool IsCaptureFirstInfo = IsFirstComponentList;
7618    // When the variable is on a declare target link or in a to clause with
7619    // unified memory, a reference is needed to hold the host/device address
7620    // of the variable.
7621    bool RequiresReference = false;
7622
7623    // Scan the components from the base to the complete expression.
7624    auto CI = Components.rbegin();
7625    auto CE = Components.rend();
7626    auto I = CI;
7627
7628    // Track if the map information being generated is the first for a list of
7629    // components.
7630    bool IsExpressionFirstInfo = true;
7631    bool FirstPointerInComplexData = false;
7632    Address BP = Address::invalid();
7633    const Expr *AssocExpr = I->getAssociatedExpression();
7634    const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7635    const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7636    const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7637
7638    if (isa<MemberExpr>(AssocExpr)) {
7639      // The base is the 'this' pointer. The content of the pointer is going
7640      // to be the base of the field being mapped.
7641      BP = CGF.LoadCXXThisAddress();
7642    } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7643               (OASE &&
7644                isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7645      BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7646    } else if (OAShE &&
7647               isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7648      BP = Address(
7649          CGF.EmitScalarExpr(OAShE->getBase()),
7650          CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7651    } else {
7652      // The base is the reference to the variable.
7653      // BP = &Var.
7654      BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7655      if (const auto *VD =
7656              dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7657        if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7658                OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7659          if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7660              (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7661               CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7662            RequiresReference = true;
7663            BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7664          }
7665        }
7666      }
7667
7668      // If the variable is a pointer and is being dereferenced (i.e. is not
7669      // the last component), the base has to be the pointer itself, not its
7670      // reference. References are ignored for mapping purposes.
7671      QualType Ty =
7672          I->getAssociatedDeclaration()->getType().getNonReferenceType();
7673      if (Ty->isAnyPointerType() && std::next(I) != CE) {
7674        // No need to generate individual map information for the pointer, it
7675        // can be associated with the combined storage if shared memory mode is
7676        // active or the base declaration is not global variable.
7677        const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7678        if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7679            !VD || VD->hasLocalStorage())
7680          BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7681        else
7682          FirstPointerInComplexData = true;
7683        ++I;
7684      }
7685    }
7686
7687    // Track whether a component of the list should be marked as MEMBER_OF some
7688    // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7689    // in a component list should be marked as MEMBER_OF, all subsequent entries
7690    // do not belong to the base struct. E.g.
7691    // struct S2 s;
7692    // s.ps->ps->ps->f[:]
7693    //   (1) (2) (3) (4)
7694    // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7695    // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7696    // is the pointee of ps(2) which is not member of struct s, so it should not
7697    // be marked as such (it is still PTR_AND_OBJ).
7698    // The variable is initialized to false so that PTR_AND_OBJ entries which
7699    // are not struct members are not considered (e.g. array of pointers to
7700    // data).
7701    bool ShouldBeMemberOf = false;
7702
7703    // Variable keeping track of whether or not we have encountered a component
7704    // in the component list which is a member expression. Useful when we have a
7705    // pointer or a final array section, in which case it is the previous
7706    // component in the list which tells us whether we have a member expression.
7707    // E.g. X.f[:]
7708    // While processing the final array section "[:]" it is "f" which tells us
7709    // whether we are dealing with a member of a declared struct.
7710    const MemberExpr *EncounteredME = nullptr;
7711
7712    // Track for the total number of dimension. Start from one for the dummy
7713    // dimension.
7714    uint64_t DimSize = 1;
7715
7716    bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7717    bool IsPrevMemberReference = false;
7718
7719    for (; I != CE; ++I) {
7720      // If the current component is member of a struct (parent struct) mark it.
7721      if (!EncounteredME) {
7722        EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7723        // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7724        // as MEMBER_OF the parent struct.
7725        if (EncounteredME) {
7726          ShouldBeMemberOf = true;
7727          // Do not emit as complex pointer if this is actually not array-like
7728          // expression.
7729          if (FirstPointerInComplexData) {
7730            QualType Ty = std::prev(I)
7731                              ->getAssociatedDeclaration()
7732                              ->getType()
7733                              .getNonReferenceType();
7734            BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7735            FirstPointerInComplexData = false;
7736          }
7737        }
7738      }
7739
7740      auto Next = std::next(I);
7741
7742      // We need to generate the addresses and sizes if this is the last
7743      // component, if the component is a pointer or if it is an array section
7744      // whose length can't be proved to be one. If this is a pointer, it
7745      // becomes the base address for the following components.
7746
7747      // A final array section, is one whose length can't be proved to be one.
7748      // If the map item is non-contiguous then we don't treat any array section
7749      // as final array section.
7750      bool IsFinalArraySection =
7751          !IsNonContiguous &&
7752          isFinalArraySectionExpression(I->getAssociatedExpression());
7753
7754      // If we have a declaration for the mapping use that, otherwise use
7755      // the base declaration of the map clause.
7756      const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7757                                     ? I->getAssociatedDeclaration()
7758                                     : BaseDecl;
7759      MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7760                                               : MapExpr;
7761
7762      // Get information on whether the element is a pointer. Have to do a
7763      // special treatment for array sections given that they are built-in
7764      // types.
7765      const auto *OASE =
7766          dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7767      const auto *OAShE =
7768          dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7769      const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7770      const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7771      bool IsPointer =
7772          OAShE ||
7773          (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7774                       .getCanonicalType()
7775                       ->isAnyPointerType()) ||
7776          I->getAssociatedExpression()->getType()->isAnyPointerType();
7777      bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7778                               MapDecl &&
7779                               MapDecl->getType()->isLValueReferenceType();
7780      bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7781
7782      if (OASE)
7783        ++DimSize;
7784
7785      if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7786          IsFinalArraySection) {
7787        // If this is not the last component, we expect the pointer to be
7788        // associated with an array expression or member expression.
7789        assert((Next == CE ||
7790                isa<MemberExpr>(Next->getAssociatedExpression()) ||
7791                isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7792                isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7793                isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7794                isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7795                isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7796               "Unexpected expression");
7797
7798        Address LB = Address::invalid();
7799        Address LowestElem = Address::invalid();
7800        auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7801                                       const MemberExpr *E) {
7802          const Expr *BaseExpr = E->getBase();
7803          // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7804          // scalar.
7805          LValue BaseLV;
7806          if (E->isArrow()) {
7807            LValueBaseInfo BaseInfo;
7808            TBAAAccessInfo TBAAInfo;
7809            Address Addr =
7810                CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7811            QualType PtrTy = BaseExpr->getType()->getPointeeType();
7812            BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7813          } else {
7814            BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7815          }
7816          return BaseLV;
7817        };
7818        if (OAShE) {
7819          LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7820                                    CGF.getContext().getTypeAlignInChars(
7821                                        OAShE->getBase()->getType()));
7822        } else if (IsMemberReference) {
7823          const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7824          LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7825          LowestElem = CGF.EmitLValueForFieldInitialization(
7826                              BaseLVal, cast<FieldDecl>(MapDecl))
7827                           .getAddress(CGF);
7828          LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7829                   .getAddress(CGF);
7830        } else {
7831          LowestElem = LB =
7832              CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7833                  .getAddress(CGF);
7834        }
7835
7836        // If this component is a pointer inside the base struct then we don't
7837        // need to create any entry for it - it will be combined with the object
7838        // it is pointing to into a single PTR_AND_OBJ entry.
7839        bool IsMemberPointerOrAddr =
7840            EncounteredME &&
7841            (((IsPointer || ForDeviceAddr) &&
7842              I->getAssociatedExpression() == EncounteredME) ||
7843             (IsPrevMemberReference && !IsPointer) ||
7844             (IsMemberReference && Next != CE &&
7845              !Next->getAssociatedExpression()->getType()->isPointerType()));
7846        if (!OverlappedElements.empty() && Next == CE) {
7847          // Handle base element with the info for overlapped elements.
7848          assert(!PartialStruct.Base.isValid() && "The base element is set.");
7849          assert(!IsPointer &&
7850                 "Unexpected base element with the pointer type.");
7851          // Mark the whole struct as the struct that requires allocation on the
7852          // device.
7853          PartialStruct.LowestElem = {0, LowestElem};
7854          CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7855              I->getAssociatedExpression()->getType());
7856          Address HB = CGF.Builder.CreateConstGEP(
7857              CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
7858                                                              CGF.VoidPtrTy),
7859              TypeSize.getQuantity() - 1);
7860          PartialStruct.HighestElem = {
7861              std::numeric_limits<decltype(
7862                  PartialStruct.HighestElem.first)>::max(),
7863              HB};
7864          PartialStruct.Base = BP;
7865          PartialStruct.LB = LB;
7866          assert(
7867              PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7868              "Overlapped elements must be used only once for the variable.");
7869          std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7870          // Emit data for non-overlapped data.
7871          OpenMPOffloadMappingFlags Flags =
7872              OMP_MAP_MEMBER_OF |
7873              getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7874                             /*AddPtrFlag=*/false,
7875                             /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7876          llvm::Value *Size = nullptr;
7877          // Do bitcopy of all non-overlapped structure elements.
7878          for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7879                   Component : OverlappedElements) {
7880            Address ComponentLB = Address::invalid();
7881            for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7882                 Component) {
7883              if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7884                const auto *FD = dyn_cast<FieldDecl>(VD);
7885                if (FD && FD->getType()->isLValueReferenceType()) {
7886                  const auto *ME =
7887                      cast<MemberExpr>(MC.getAssociatedExpression());
7888                  LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7889                  ComponentLB =
7890                      CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7891                          .getAddress(CGF);
7892                } else {
7893                  ComponentLB =
7894                      CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7895                          .getAddress(CGF);
7896                }
7897                Size = CGF.Builder.CreatePtrDiff(
7898                    CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7899                    CGF.EmitCastToVoidPtr(LB.getPointer()));
7900                break;
7901              }
7902            }
7903            assert(Size && "Failed to determine structure size");
7904            CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7905            CombinedInfo.BasePointers.push_back(BP.getPointer());
7906            CombinedInfo.Pointers.push_back(LB.getPointer());
7907            CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7908                Size, CGF.Int64Ty, /*isSigned=*/true));
7909            CombinedInfo.Types.push_back(Flags);
7910            CombinedInfo.Mappers.push_back(nullptr);
7911            CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7912                                                                      : 1);
7913            LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7914          }
7915          CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7916          CombinedInfo.BasePointers.push_back(BP.getPointer());
7917          CombinedInfo.Pointers.push_back(LB.getPointer());
7918          Size = CGF.Builder.CreatePtrDiff(
7919              CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7920              CGF.EmitCastToVoidPtr(LB.getPointer()));
7921          CombinedInfo.Sizes.push_back(
7922              CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7923          CombinedInfo.Types.push_back(Flags);
7924          CombinedInfo.Mappers.push_back(nullptr);
7925          CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7926                                                                    : 1);
7927          break;
7928        }
7929        llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7930        if (!IsMemberPointerOrAddr ||
7931            (Next == CE && MapType != OMPC_MAP_unknown)) {
7932          CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7933          CombinedInfo.BasePointers.push_back(BP.getPointer());
7934          CombinedInfo.Pointers.push_back(LB.getPointer());
7935          CombinedInfo.Sizes.push_back(
7936              CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7937          CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7938                                                                    : 1);
7939
7940          // If Mapper is valid, the last component inherits the mapper.
7941          bool HasMapper = Mapper && Next == CE;
7942          CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7943
7944          // We need to add a pointer flag for each map that comes from the
7945          // same expression except for the first one. We also need to signal
7946          // this map is the first one that relates with the current capture
7947          // (there is a set of entries for each capture).
7948          OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7949              MapType, MapModifiers, MotionModifiers, IsImplicit,
7950              !IsExpressionFirstInfo || RequiresReference ||
7951                  FirstPointerInComplexData || IsMemberReference,
7952              IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7953
7954          if (!IsExpressionFirstInfo || IsMemberReference) {
7955            // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7956            // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7957            if (IsPointer || (IsMemberReference && Next != CE))
7958              Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7959                         OMP_MAP_DELETE | OMP_MAP_CLOSE);
7960
7961            if (ShouldBeMemberOf) {
7962              // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7963              // should be later updated with the correct value of MEMBER_OF.
7964              Flags |= OMP_MAP_MEMBER_OF;
7965              // From now on, all subsequent PTR_AND_OBJ entries should not be
7966              // marked as MEMBER_OF.
7967              ShouldBeMemberOf = false;
7968            }
7969          }
7970
7971          CombinedInfo.Types.push_back(Flags);
7972        }
7973
7974        // If we have encountered a member expression so far, keep track of the
7975        // mapped member. If the parent is "*this", then the value declaration
7976        // is nullptr.
7977        if (EncounteredME) {
7978          const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7979          unsigned FieldIndex = FD->getFieldIndex();
7980
7981          // Update info about the lowest and highest elements for this struct
7982          if (!PartialStruct.Base.isValid()) {
7983            PartialStruct.LowestElem = {FieldIndex, LowestElem};
7984            if (IsFinalArraySection) {
7985              Address HB =
7986                  CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7987                      .getAddress(CGF);
7988              PartialStruct.HighestElem = {FieldIndex, HB};
7989            } else {
7990              PartialStruct.HighestElem = {FieldIndex, LowestElem};
7991            }
7992            PartialStruct.Base = BP;
7993            PartialStruct.LB = BP;
7994          } else if (FieldIndex < PartialStruct.LowestElem.first) {
7995            PartialStruct.LowestElem = {FieldIndex, LowestElem};
7996          } else if (FieldIndex > PartialStruct.HighestElem.first) {
7997            PartialStruct.HighestElem = {FieldIndex, LowestElem};
7998          }
7999        }
8000
8001        // Need to emit combined struct for array sections.
8002        if (IsFinalArraySection || IsNonContiguous)
8003          PartialStruct.IsArraySection = true;
8004
8005        // If we have a final array section, we are done with this expression.
8006        if (IsFinalArraySection)
8007          break;
8008
8009        // The pointer becomes the base for the next element.
8010        if (Next != CE)
8011          BP = IsMemberReference ? LowestElem : LB;
8012
8013        IsExpressionFirstInfo = false;
8014        IsCaptureFirstInfo = false;
8015        FirstPointerInComplexData = false;
8016        IsPrevMemberReference = IsMemberReference;
8017      } else if (FirstPointerInComplexData) {
8018        QualType Ty = Components.rbegin()
8019                          ->getAssociatedDeclaration()
8020                          ->getType()
8021                          .getNonReferenceType();
8022        BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8023        FirstPointerInComplexData = false;
8024      }
8025    }
8026    // If ran into the whole component - allocate the space for the whole
8027    // record.
8028    if (!EncounteredME)
8029      PartialStruct.HasCompleteRecord = true;
8030
8031    if (!IsNonContiguous)
8032      return;
8033
8034    const ASTContext &Context = CGF.getContext();
8035
8036    // For supporting stride in array section, we need to initialize the first
8037    // dimension size as 1, first offset as 0, and first count as 1
8038    MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8039    MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8040    MapValuesArrayTy CurStrides;
8041    MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8042    uint64_t ElementTypeSize;
8043
8044    // Collect Size information for each dimension and get the element size as
8045    // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8046    // should be [10, 10] and the first stride is 4 btyes.
8047    for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8048         Components) {
8049      const Expr *AssocExpr = Component.getAssociatedExpression();
8050      const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8051
8052      if (!OASE)
8053        continue;
8054
8055      QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8056      auto *CAT = Context.getAsConstantArrayType(Ty);
8057      auto *VAT = Context.getAsVariableArrayType(Ty);
8058
8059      // We need all the dimension size except for the last dimension.
8060      assert((VAT || CAT || &Component == &*Components.begin()) &&
8061             "Should be either ConstantArray or VariableArray if not the "
8062             "first Component");
8063
8064      // Get element size if CurStrides is empty.
8065      if (CurStrides.empty()) {
8066        const Type *ElementType = nullptr;
8067        if (CAT)
8068          ElementType = CAT->getElementType().getTypePtr();
8069        else if (VAT)
8070          ElementType = VAT->getElementType().getTypePtr();
8071        else
8072          assert(&Component == &*Components.begin() &&
8073                 "Only expect pointer (non CAT or VAT) when this is the "
8074                 "first Component");
8075        // If ElementType is null, then it means the base is a pointer
8076        // (neither CAT nor VAT) and we'll attempt to get ElementType again
8077        // for next iteration.
8078        if (ElementType) {
8079          // For the case that having pointer as base, we need to remove one
8080          // level of indirection.
8081          if (&Component != &*Components.begin())
8082            ElementType = ElementType->getPointeeOrArrayElementType();
8083          ElementTypeSize =
8084              Context.getTypeSizeInChars(ElementType).getQuantity();
8085          CurStrides.push_back(
8086              llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8087        }
8088      }
8089      // Get dimension value except for the last dimension since we don't need
8090      // it.
8091      if (DimSizes.size() < Components.size() - 1) {
8092        if (CAT)
8093          DimSizes.push_back(llvm::ConstantInt::get(
8094              CGF.Int64Ty, CAT->getSize().getZExtValue()));
8095        else if (VAT)
8096          DimSizes.push_back(CGF.Builder.CreateIntCast(
8097              CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8098              /*IsSigned=*/false));
8099      }
8100    }
8101
8102    // Skip the dummy dimension since we have already have its information.
8103    auto DI = DimSizes.begin() + 1;
8104    // Product of dimension.
8105    llvm::Value *DimProd =
8106        llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8107
8108    // Collect info for non-contiguous. Notice that offset, count, and stride
8109    // are only meaningful for array-section, so we insert a null for anything
8110    // other than array-section.
8111    // Also, the size of offset, count, and stride are not the same as
8112    // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8113    // count, and stride are the same as the number of non-contiguous
8114    // declaration in target update to/from clause.
8115    for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8116         Components) {
8117      const Expr *AssocExpr = Component.getAssociatedExpression();
8118
8119      if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8120        llvm::Value *Offset = CGF.Builder.CreateIntCast(
8121            CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8122            /*isSigned=*/false);
8123        CurOffsets.push_back(Offset);
8124        CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8125        CurStrides.push_back(CurStrides.back());
8126        continue;
8127      }
8128
8129      const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8130
8131      if (!OASE)
8132        continue;
8133
8134      // Offset
8135      const Expr *OffsetExpr = OASE->getLowerBound();
8136      llvm::Value *Offset = nullptr;
8137      if (!OffsetExpr) {
8138        // If offset is absent, then we just set it to zero.
8139        Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8140      } else {
8141        Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8142                                           CGF.Int64Ty,
8143                                           /*isSigned=*/false);
8144      }
8145      CurOffsets.push_back(Offset);
8146
8147      // Count
8148      const Expr *CountExpr = OASE->getLength();
8149      llvm::Value *Count = nullptr;
8150      if (!CountExpr) {
8151        // In Clang, once a high dimension is an array section, we construct all
8152        // the lower dimension as array section, however, for case like
8153        // arr[0:2][2], Clang construct the inner dimension as an array section
8154        // but it actually is not in an array section form according to spec.
8155        if (!OASE->getColonLocFirst().isValid() &&
8156            !OASE->getColonLocSecond().isValid()) {
8157          Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8158        } else {
8159          // OpenMP 5.0, 2.1.5 Array Sections, Description.
8160          // When the length is absent it defaults to ���(size ���
8161          // lower-bound)/stride���, where size is the size of the array
8162          // dimension.
8163          const Expr *StrideExpr = OASE->getStride();
8164          llvm::Value *Stride =
8165              StrideExpr
8166                  ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8167                                              CGF.Int64Ty, /*isSigned=*/false)
8168                  : nullptr;
8169          if (Stride)
8170            Count = CGF.Builder.CreateUDiv(
8171                CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8172          else
8173            Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8174        }
8175      } else {
8176        Count = CGF.EmitScalarExpr(CountExpr);
8177      }
8178      Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8179      CurCounts.push_back(Count);
8180
8181      // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8182      // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8183      //              Offset      Count     Stride
8184      //    D0          0           1         4    (int)    <- dummy dimension
8185      //    D1          0           2         8    (2 * (1) * 4)
8186      //    D2          1           2         20   (1 * (1 * 5) * 4)
8187      //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8188      const Expr *StrideExpr = OASE->getStride();
8189      llvm::Value *Stride =
8190          StrideExpr
8191              ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8192                                          CGF.Int64Ty, /*isSigned=*/false)
8193              : nullptr;
8194      DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8195      if (Stride)
8196        CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8197      else
8198        CurStrides.push_back(DimProd);
8199      if (DI != DimSizes.end())
8200        ++DI;
8201    }
8202
8203    CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8204    CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8205    CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8206  }
8207
8208  /// Return the adjusted map modifiers if the declaration a capture refers to
8209  /// appears in a first-private clause. This is expected to be used only with
8210  /// directives that start with 'target'.
8211  MappableExprsHandler::OpenMPOffloadMappingFlags
8212  getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8213    assert(Cap.capturesVariable() && "Expected capture by reference only!");
8214
8215    // A first private variable captured by reference will use only the
8216    // 'private ptr' and 'map to' flag. Return the right flags if the captured
8217    // declaration is known as first-private in this handler.
8218    if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8219      if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8220          Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8221        return MappableExprsHandler::OMP_MAP_ALWAYS |
8222               MappableExprsHandler::OMP_MAP_TO;
8223      if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8224        return MappableExprsHandler::OMP_MAP_TO |
8225               MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8226      return MappableExprsHandler::OMP_MAP_PRIVATE |
8227             MappableExprsHandler::OMP_MAP_TO;
8228    }
8229    return MappableExprsHandler::OMP_MAP_TO |
8230           MappableExprsHandler::OMP_MAP_FROM;
8231  }
8232
8233  static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8234    // Rotate by getFlagMemberOffset() bits.
8235    return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8236                                                  << getFlagMemberOffset());
8237  }
8238
8239  static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8240                                     OpenMPOffloadMappingFlags MemberOfFlag) {
8241    // If the entry is PTR_AND_OBJ but has not been marked with the special
8242    // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8243    // marked as MEMBER_OF.
8244    if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8245        ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8246      return;
8247
8248    // Reset the placeholder value to prepare the flag for the assignment of the
8249    // proper MEMBER_OF value.
8250    Flags &= ~OMP_MAP_MEMBER_OF;
8251    Flags |= MemberOfFlag;
8252  }
8253
8254  void getPlainLayout(const CXXRecordDecl *RD,
8255                      llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8256                      bool AsBase) const {
8257    const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8258
8259    llvm::StructType *St =
8260        AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8261
8262    unsigned NumElements = St->getNumElements();
8263    llvm::SmallVector<
8264        llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8265        RecordLayout(NumElements);
8266
8267    // Fill bases.
8268    for (const auto &I : RD->bases()) {
8269      if (I.isVirtual())
8270        continue;
8271      const auto *Base = I.getType()->getAsCXXRecordDecl();
8272      // Ignore empty bases.
8273      if (Base->isEmpty() || CGF.getContext()
8274                                 .getASTRecordLayout(Base)
8275                                 .getNonVirtualSize()
8276                                 .isZero())
8277        continue;
8278
8279      unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8280      RecordLayout[FieldIndex] = Base;
8281    }
8282    // Fill in virtual bases.
8283    for (const auto &I : RD->vbases()) {
8284      const auto *Base = I.getType()->getAsCXXRecordDecl();
8285      // Ignore empty bases.
8286      if (Base->isEmpty())
8287        continue;
8288      unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8289      if (RecordLayout[FieldIndex])
8290        continue;
8291      RecordLayout[FieldIndex] = Base;
8292    }
8293    // Fill in all the fields.
8294    assert(!RD->isUnion() && "Unexpected union.");
8295    for (const auto *Field : RD->fields()) {
8296      // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8297      // will fill in later.)
8298      if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8299        unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8300        RecordLayout[FieldIndex] = Field;
8301      }
8302    }
8303    for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8304             &Data : RecordLayout) {
8305      if (Data.isNull())
8306        continue;
8307      if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8308        getPlainLayout(Base, Layout, /*AsBase=*/true);
8309      else
8310        Layout.push_back(Data.get<const FieldDecl *>());
8311    }
8312  }
8313
8314  /// Generate all the base pointers, section pointers, sizes, map types, and
8315  /// mappers for the extracted mappable expressions (all included in \a
8316  /// CombinedInfo). Also, for each item that relates with a device pointer, a
8317  /// pair of the relevant declaration and index where it occurs is appended to
8318  /// the device pointers info array.
8319  void generateAllInfoForClauses(
8320      ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8321      const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8322          llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8323    // We have to process the component lists that relate with the same
8324    // declaration in a single chunk so that we can generate the map flags
8325    // correctly. Therefore, we organize all lists in a map.
8326    enum MapKind { Present, Allocs, Other, Total };
8327    llvm::MapVector<CanonicalDeclPtr<const Decl>,
8328                    SmallVector<SmallVector<MapInfo, 8>, 4>>
8329        Info;
8330
8331    // Helper function to fill the information map for the different supported
8332    // clauses.
8333    auto &&InfoGen =
8334        [&Info, &SkipVarSet](
8335            const ValueDecl *D, MapKind Kind,
8336            OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8337            OpenMPMapClauseKind MapType,
8338            ArrayRef<OpenMPMapModifierKind> MapModifiers,
8339            ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8340            bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8341            const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8342          if (SkipVarSet.contains(D))
8343            return;
8344          auto It = Info.find(D);
8345          if (It == Info.end())
8346            It = Info
8347                     .insert(std::make_pair(
8348                         D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8349                     .first;
8350          It->second[Kind].emplace_back(
8351              L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8352              IsImplicit, Mapper, VarRef, ForDeviceAddr);
8353        };
8354
8355    for (const auto *Cl : Clauses) {
8356      const auto *C = dyn_cast<OMPMapClause>(Cl);
8357      if (!C)
8358        continue;
8359      MapKind Kind = Other;
8360      if (!C->getMapTypeModifiers().empty() &&
8361          llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8362            return K == OMPC_MAP_MODIFIER_present;
8363          }))
8364        Kind = Present;
8365      else if (C->getMapType() == OMPC_MAP_alloc)
8366        Kind = Allocs;
8367      const auto *EI = C->getVarRefs().begin();
8368      for (const auto L : C->component_lists()) {
8369        const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8370        InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8371                C->getMapTypeModifiers(), llvm::None,
8372                /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8373                E);
8374        ++EI;
8375      }
8376    }
8377    for (const auto *Cl : Clauses) {
8378      const auto *C = dyn_cast<OMPToClause>(Cl);
8379      if (!C)
8380        continue;
8381      MapKind Kind = Other;
8382      if (!C->getMotionModifiers().empty() &&
8383          llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8384            return K == OMPC_MOTION_MODIFIER_present;
8385          }))
8386        Kind = Present;
8387      const auto *EI = C->getVarRefs().begin();
8388      for (const auto L : C->component_lists()) {
8389        InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8390                C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8391                C->isImplicit(), std::get<2>(L), *EI);
8392        ++EI;
8393      }
8394    }
8395    for (const auto *Cl : Clauses) {
8396      const auto *C = dyn_cast<OMPFromClause>(Cl);
8397      if (!C)
8398        continue;
8399      MapKind Kind = Other;
8400      if (!C->getMotionModifiers().empty() &&
8401          llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8402            return K == OMPC_MOTION_MODIFIER_present;
8403          }))
8404        Kind = Present;
8405      const auto *EI = C->getVarRefs().begin();
8406      for (const auto L : C->component_lists()) {
8407        InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8408                C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8409                C->isImplicit(), std::get<2>(L), *EI);
8410        ++EI;
8411      }
8412    }
8413
8414    // Look at the use_device_ptr clause information and mark the existing map
8415    // entries as such. If there is no map information for an entry in the
8416    // use_device_ptr list, we create one with map type 'alloc' and zero size
8417    // section. It is the user fault if that was not mapped before. If there is
8418    // no map information and the pointer is a struct member, then we defer the
8419    // emission of that entry until the whole struct has been processed.
8420    llvm::MapVector<CanonicalDeclPtr<const Decl>,
8421                    SmallVector<DeferredDevicePtrEntryTy, 4>>
8422        DeferredInfo;
8423    MapCombinedInfoTy UseDevicePtrCombinedInfo;
8424
8425    for (const auto *Cl : Clauses) {
8426      const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8427      if (!C)
8428        continue;
8429      for (const auto L : C->component_lists()) {
8430        OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8431            std::get<1>(L);
8432        assert(!Components.empty() &&
8433               "Not expecting empty list of components!");
8434        const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8435        VD = cast<ValueDecl>(VD->getCanonicalDecl());
8436        const Expr *IE = Components.back().getAssociatedExpression();
8437        // If the first component is a member expression, we have to look into
8438        // 'this', which maps to null in the map of map information. Otherwise
8439        // look directly for the information.
8440        auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8441
8442        // We potentially have map information for this declaration already.
8443        // Look for the first set of components that refer to it.
8444        if (It != Info.end()) {
8445          bool Found = false;
8446          for (auto &Data : It->second) {
8447            auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8448              return MI.Components.back().getAssociatedDeclaration() == VD;
8449            });
8450            // If we found a map entry, signal that the pointer has to be
8451            // returned and move on to the next declaration. Exclude cases where
8452            // the base pointer is mapped as array subscript, array section or
8453            // array shaping. The base address is passed as a pointer to base in
8454            // this case and cannot be used as a base for use_device_ptr list
8455            // item.
8456            if (CI != Data.end()) {
8457              auto PrevCI = std::next(CI->Components.rbegin());
8458              const auto *VarD = dyn_cast<VarDecl>(VD);
8459              if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8460                  isa<MemberExpr>(IE) ||
8461                  !VD->getType().getNonReferenceType()->isPointerType() ||
8462                  PrevCI == CI->Components.rend() ||
8463                  isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8464                  VarD->hasLocalStorage()) {
8465                CI->ReturnDevicePointer = true;
8466                Found = true;
8467                break;
8468              }
8469            }
8470          }
8471          if (Found)
8472            continue;
8473        }
8474
8475        // We didn't find any match in our map information - generate a zero
8476        // size array section - if the pointer is a struct member we defer this
8477        // action until the whole struct has been processed.
8478        if (isa<MemberExpr>(IE)) {
8479          // Insert the pointer into Info to be processed by
8480          // generateInfoForComponentList. Because it is a member pointer
8481          // without a pointee, no entry will be generated for it, therefore
8482          // we need to generate one after the whole struct has been processed.
8483          // Nonetheless, generateInfoForComponentList must be called to take
8484          // the pointer into account for the calculation of the range of the
8485          // partial struct.
8486          InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8487                  llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8488                  nullptr);
8489          DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8490        } else {
8491          llvm::Value *Ptr =
8492              CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8493          UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8494          UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8495          UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8496          UseDevicePtrCombinedInfo.Sizes.push_back(
8497              llvm::Constant::getNullValue(CGF.Int64Ty));
8498          UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8499          UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8500        }
8501      }
8502    }
8503
8504    // Look at the use_device_addr clause information and mark the existing map
8505    // entries as such. If there is no map information for an entry in the
8506    // use_device_addr list, we create one with map type 'alloc' and zero size
8507    // section. It is the user fault if that was not mapped before. If there is
8508    // no map information and the pointer is a struct member, then we defer the
8509    // emission of that entry until the whole struct has been processed.
8510    llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8511    for (const auto *Cl : Clauses) {
8512      const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8513      if (!C)
8514        continue;
8515      for (const auto L : C->component_lists()) {
8516        assert(!std::get<1>(L).empty() &&
8517               "Not expecting empty list of components!");
8518        const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8519        if (!Processed.insert(VD).second)
8520          continue;
8521        VD = cast<ValueDecl>(VD->getCanonicalDecl());
8522        const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8523        // If the first component is a member expression, we have to look into
8524        // 'this', which maps to null in the map of map information. Otherwise
8525        // look directly for the information.
8526        auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8527
8528        // We potentially have map information for this declaration already.
8529        // Look for the first set of components that refer to it.
8530        if (It != Info.end()) {
8531          bool Found = false;
8532          for (auto &Data : It->second) {
8533            auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8534              return MI.Components.back().getAssociatedDeclaration() == VD;
8535            });
8536            // If we found a map entry, signal that the pointer has to be
8537            // returned and move on to the next declaration.
8538            if (CI != Data.end()) {
8539              CI->ReturnDevicePointer = true;
8540              Found = true;
8541              break;
8542            }
8543          }
8544          if (Found)
8545            continue;
8546        }
8547
8548        // We didn't find any match in our map information - generate a zero
8549        // size array section - if the pointer is a struct member we defer this
8550        // action until the whole struct has been processed.
8551        if (isa<MemberExpr>(IE)) {
8552          // Insert the pointer into Info to be processed by
8553          // generateInfoForComponentList. Because it is a member pointer
8554          // without a pointee, no entry will be generated for it, therefore
8555          // we need to generate one after the whole struct has been processed.
8556          // Nonetheless, generateInfoForComponentList must be called to take
8557          // the pointer into account for the calculation of the range of the
8558          // partial struct.
8559          InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8560                  llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8561                  nullptr, nullptr, /*ForDeviceAddr=*/true);
8562          DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8563        } else {
8564          llvm::Value *Ptr;
8565          if (IE->isGLValue())
8566            Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8567          else
8568            Ptr = CGF.EmitScalarExpr(IE);
8569          CombinedInfo.Exprs.push_back(VD);
8570          CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8571          CombinedInfo.Pointers.push_back(Ptr);
8572          CombinedInfo.Sizes.push_back(
8573              llvm::Constant::getNullValue(CGF.Int64Ty));
8574          CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8575          CombinedInfo.Mappers.push_back(nullptr);
8576        }
8577      }
8578    }
8579
8580    for (const auto &Data : Info) {
8581      StructRangeInfoTy PartialStruct;
8582      // Temporary generated information.
8583      MapCombinedInfoTy CurInfo;
8584      const Decl *D = Data.first;
8585      const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8586      for (const auto &M : Data.second) {
8587        for (const MapInfo &L : M) {
8588          assert(!L.Components.empty() &&
8589                 "Not expecting declaration with no component lists.");
8590
8591          // Remember the current base pointer index.
8592          unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8593          CurInfo.NonContigInfo.IsNonContiguous =
8594              L.Components.back().isNonContiguous();
8595          generateInfoForComponentList(
8596              L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8597              CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8598              L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8599
8600          // If this entry relates with a device pointer, set the relevant
8601          // declaration and add the 'return pointer' flag.
8602          if (L.ReturnDevicePointer) {
8603            assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8604                   "Unexpected number of mapped base pointers.");
8605
8606            const ValueDecl *RelevantVD =
8607                L.Components.back().getAssociatedDeclaration();
8608            assert(RelevantVD &&
8609                   "No relevant declaration related with device pointer??");
8610
8611            CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8612                RelevantVD);
8613            CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8614          }
8615        }
8616      }
8617
8618      // Append any pending zero-length pointers which are struct members and
8619      // used with use_device_ptr or use_device_addr.
8620      auto CI = DeferredInfo.find(Data.first);
8621      if (CI != DeferredInfo.end()) {
8622        for (const DeferredDevicePtrEntryTy &L : CI->second) {
8623          llvm::Value *BasePtr;
8624          llvm::Value *Ptr;
8625          if (L.ForDeviceAddr) {
8626            if (L.IE->isGLValue())
8627              Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8628            else
8629              Ptr = this->CGF.EmitScalarExpr(L.IE);
8630            BasePtr = Ptr;
8631            // Entry is RETURN_PARAM. Also, set the placeholder value
8632            // MEMBER_OF=FFFF so that the entry is later updated with the
8633            // correct value of MEMBER_OF.
8634            CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8635          } else {
8636            BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8637            Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8638                                             L.IE->getExprLoc());
8639            // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8640            // placeholder value MEMBER_OF=FFFF so that the entry is later
8641            // updated with the correct value of MEMBER_OF.
8642            CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8643                                    OMP_MAP_MEMBER_OF);
8644          }
8645          CurInfo.Exprs.push_back(L.VD);
8646          CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8647          CurInfo.Pointers.push_back(Ptr);
8648          CurInfo.Sizes.push_back(
8649              llvm::Constant::getNullValue(this->CGF.Int64Ty));
8650          CurInfo.Mappers.push_back(nullptr);
8651        }
8652      }
8653      // If there is an entry in PartialStruct it means we have a struct with
8654      // individual members mapped. Emit an extra combined entry.
8655      if (PartialStruct.Base.isValid()) {
8656        CurInfo.NonContigInfo.Dims.push_back(0);
8657        emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8658      }
8659
8660      // We need to append the results of this capture to what we already
8661      // have.
8662      CombinedInfo.append(CurInfo);
8663    }
8664    // Append data for use_device_ptr clauses.
8665    CombinedInfo.append(UseDevicePtrCombinedInfo);
8666  }
8667
8668public:
8669  MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8670      : CurDir(&Dir), CGF(CGF) {
8671    // Extract firstprivate clause information.
8672    for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8673      for (const auto *D : C->varlists())
8674        FirstPrivateDecls.try_emplace(
8675            cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8676    // Extract implicit firstprivates from uses_allocators clauses.
8677    for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8678      for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8679        OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8680        if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8681          FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8682                                        /*Implicit=*/true);
8683        else if (const auto *VD = dyn_cast<VarDecl>(
8684                     cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8685                         ->getDecl()))
8686          FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8687      }
8688    }
8689    // Extract device pointer clause information.
8690    for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8691      for (auto L : C->component_lists())
8692        DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8693  }
8694
8695  /// Constructor for the declare mapper directive.
8696  MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8697      : CurDir(&Dir), CGF(CGF) {}
8698
8699  /// Generate code for the combined entry if we have a partially mapped struct
8700  /// and take care of the mapping flags of the arguments corresponding to
8701  /// individual struct members.
8702  void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8703                         MapFlagsArrayTy &CurTypes,
8704                         const StructRangeInfoTy &PartialStruct,
8705                         const ValueDecl *VD = nullptr,
8706                         bool NotTargetParams = true) const {
8707    if (CurTypes.size() == 1 &&
8708        ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8709        !PartialStruct.IsArraySection)
8710      return;
8711    Address LBAddr = PartialStruct.LowestElem.second;
8712    Address HBAddr = PartialStruct.HighestElem.second;
8713    if (PartialStruct.HasCompleteRecord) {
8714      LBAddr = PartialStruct.LB;
8715      HBAddr = PartialStruct.LB;
8716    }
8717    CombinedInfo.Exprs.push_back(VD);
8718    // Base is the base of the struct
8719    CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8720    // Pointer is the address of the lowest element
8721    llvm::Value *LB = LBAddr.getPointer();
8722    CombinedInfo.Pointers.push_back(LB);
8723    // There should not be a mapper for a combined entry.
8724    CombinedInfo.Mappers.push_back(nullptr);
8725    // Size is (addr of {highest+1} element) - (addr of lowest element)
8726    llvm::Value *HB = HBAddr.getPointer();
8727    llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8728    llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8729    llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8730    llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8731    llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8732                                                  /*isSigned=*/false);
8733    CombinedInfo.Sizes.push_back(Size);
8734    // Map type is always TARGET_PARAM, if generate info for captures.
8735    CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8736                                                 : OMP_MAP_TARGET_PARAM);
8737    // If any element has the present modifier, then make sure the runtime
8738    // doesn't attempt to allocate the struct.
8739    if (CurTypes.end() !=
8740        llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8741          return Type & OMP_MAP_PRESENT;
8742        }))
8743      CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8744    // Remove TARGET_PARAM flag from the first element
8745    (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8746
8747    // All other current entries will be MEMBER_OF the combined entry
8748    // (except for PTR_AND_OBJ entries which do not have a placeholder value
8749    // 0xFFFF in the MEMBER_OF field).
8750    OpenMPOffloadMappingFlags MemberOfFlag =
8751        getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8752    for (auto &M : CurTypes)
8753      setCorrectMemberOfFlag(M, MemberOfFlag);
8754  }
8755
8756  /// Generate all the base pointers, section pointers, sizes, map types, and
8757  /// mappers for the extracted mappable expressions (all included in \a
8758  /// CombinedInfo). Also, for each item that relates with a device pointer, a
8759  /// pair of the relevant declaration and index where it occurs is appended to
8760  /// the device pointers info array.
8761  void generateAllInfo(
8762      MapCombinedInfoTy &CombinedInfo,
8763      const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8764          llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8765    assert(CurDir.is<const OMPExecutableDirective *>() &&
8766           "Expect a executable directive");
8767    const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8768    generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8769  }
8770
8771  /// Generate all the base pointers, section pointers, sizes, map types, and
8772  /// mappers for the extracted map clauses of user-defined mapper (all included
8773  /// in \a CombinedInfo).
8774  void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8775    assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8776           "Expect a declare mapper directive");
8777    const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8778    generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8779  }
8780
8781  /// Emit capture info for lambdas for variables captured by reference.
8782  void generateInfoForLambdaCaptures(
8783      const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8784      llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8785    const auto *RD = VD->getType()
8786                         .getCanonicalType()
8787                         .getNonReferenceType()
8788                         ->getAsCXXRecordDecl();
8789    if (!RD || !RD->isLambda())
8790      return;
8791    Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8792    LValue VDLVal = CGF.MakeAddrLValue(
8793        VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8794    llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8795    FieldDecl *ThisCapture = nullptr;
8796    RD->getCaptureFields(Captures, ThisCapture);
8797    if (ThisCapture) {
8798      LValue ThisLVal =
8799          CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8800      LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8801      LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8802                                 VDLVal.getPointer(CGF));
8803      CombinedInfo.Exprs.push_back(VD);
8804      CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8805      CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8806      CombinedInfo.Sizes.push_back(
8807          CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8808                                    CGF.Int64Ty, /*isSigned=*/true));
8809      CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8810                                   OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8811      CombinedInfo.Mappers.push_back(nullptr);
8812    }
8813    for (const LambdaCapture &LC : RD->captures()) {
8814      if (!LC.capturesVariable())
8815        continue;
8816      const VarDecl *VD = LC.getCapturedVar();
8817      if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8818        continue;
8819      auto It = Captures.find(VD);
8820      assert(It != Captures.end() && "Found lambda capture without field.");
8821      LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8822      if (LC.getCaptureKind() == LCK_ByRef) {
8823        LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8824        LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8825                                   VDLVal.getPointer(CGF));
8826        CombinedInfo.Exprs.push_back(VD);
8827        CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8828        CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8829        CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8830            CGF.getTypeSize(
8831                VD->getType().getCanonicalType().getNonReferenceType()),
8832            CGF.Int64Ty, /*isSigned=*/true));
8833      } else {
8834        RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8835        LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8836                                   VDLVal.getPointer(CGF));
8837        CombinedInfo.Exprs.push_back(VD);
8838        CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8839        CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8840        CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8841      }
8842      CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8843                                   OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8844      CombinedInfo.Mappers.push_back(nullptr);
8845    }
8846  }
8847
8848  /// Set correct indices for lambdas captures.
8849  void adjustMemberOfForLambdaCaptures(
8850      const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8851      MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8852      MapFlagsArrayTy &Types) const {
8853    for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8854      // Set correct member_of idx for all implicit lambda captures.
8855      if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8856                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8857        continue;
8858      llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8859      assert(BasePtr && "Unable to find base lambda address.");
8860      int TgtIdx = -1;
8861      for (unsigned J = I; J > 0; --J) {
8862        unsigned Idx = J - 1;
8863        if (Pointers[Idx] != BasePtr)
8864          continue;
8865        TgtIdx = Idx;
8866        break;
8867      }
8868      assert(TgtIdx != -1 && "Unable to find parent lambda.");
8869      // All other current entries will be MEMBER_OF the combined entry
8870      // (except for PTR_AND_OBJ entries which do not have a placeholder value
8871      // 0xFFFF in the MEMBER_OF field).
8872      OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8873      setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8874    }
8875  }
8876
8877  /// Generate the base pointers, section pointers, sizes, map types, and
8878  /// mappers associated to a given capture (all included in \a CombinedInfo).
8879  void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8880                              llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8881                              StructRangeInfoTy &PartialStruct) const {
8882    assert(!Cap->capturesVariableArrayType() &&
8883           "Not expecting to generate map info for a variable array type!");
8884
8885    // We need to know when we generating information for the first component
8886    const ValueDecl *VD = Cap->capturesThis()
8887                              ? nullptr
8888                              : Cap->getCapturedVar()->getCanonicalDecl();
8889
8890    // If this declaration appears in a is_device_ptr clause we just have to
8891    // pass the pointer by value. If it is a reference to a declaration, we just
8892    // pass its value.
8893    if (DevPointersMap.count(VD)) {
8894      CombinedInfo.Exprs.push_back(VD);
8895      CombinedInfo.BasePointers.emplace_back(Arg, VD);
8896      CombinedInfo.Pointers.push_back(Arg);
8897      CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8898          CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8899          /*isSigned=*/true));
8900      CombinedInfo.Types.push_back(
8901          (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8902          OMP_MAP_TARGET_PARAM);
8903      CombinedInfo.Mappers.push_back(nullptr);
8904      return;
8905    }
8906
8907    using MapData =
8908        std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8909                   OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8910                   const ValueDecl *, const Expr *>;
8911    SmallVector<MapData, 4> DeclComponentLists;
8912    assert(CurDir.is<const OMPExecutableDirective *>() &&
8913           "Expect a executable directive");
8914    const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8915    for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8916      const auto *EI = C->getVarRefs().begin();
8917      for (const auto L : C->decl_component_lists(VD)) {
8918        const ValueDecl *VDecl, *Mapper;
8919        // The Expression is not correct if the mapping is implicit
8920        const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8921        OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8922        std::tie(VDecl, Components, Mapper) = L;
8923        assert(VDecl == VD && "We got information for the wrong declaration??");
8924        assert(!Components.empty() &&
8925               "Not expecting declaration with no component lists.");
8926        DeclComponentLists.emplace_back(Components, C->getMapType(),
8927                                        C->getMapTypeModifiers(),
8928                                        C->isImplicit(), Mapper, E);
8929        ++EI;
8930      }
8931    }
8932    llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8933                                             const MapData &RHS) {
8934      ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8935      OpenMPMapClauseKind MapType = std::get<1>(RHS);
8936      bool HasPresent = !MapModifiers.empty() &&
8937                        llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8938                          return K == clang::OMPC_MAP_MODIFIER_present;
8939                        });
8940      bool HasAllocs = MapType == OMPC_MAP_alloc;
8941      MapModifiers = std::get<2>(RHS);
8942      MapType = std::get<1>(LHS);
8943      bool HasPresentR =
8944          !MapModifiers.empty() &&
8945          llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8946            return K == clang::OMPC_MAP_MODIFIER_present;
8947          });
8948      bool HasAllocsR = MapType == OMPC_MAP_alloc;
8949      return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8950    });
8951
8952    // Find overlapping elements (including the offset from the base element).
8953    llvm::SmallDenseMap<
8954        const MapData *,
8955        llvm::SmallVector<
8956            OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8957        4>
8958        OverlappedData;
8959    size_t Count = 0;
8960    for (const MapData &L : DeclComponentLists) {
8961      OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8962      OpenMPMapClauseKind MapType;
8963      ArrayRef<OpenMPMapModifierKind> MapModifiers;
8964      bool IsImplicit;
8965      const ValueDecl *Mapper;
8966      const Expr *VarRef;
8967      std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8968          L;
8969      ++Count;
8970      for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8971        OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8972        std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8973                 VarRef) = L1;
8974        auto CI = Components.rbegin();
8975        auto CE = Components.rend();
8976        auto SI = Components1.rbegin();
8977        auto SE = Components1.rend();
8978        for (; CI != CE && SI != SE; ++CI, ++SI) {
8979          if (CI->getAssociatedExpression()->getStmtClass() !=
8980              SI->getAssociatedExpression()->getStmtClass())
8981            break;
8982          // Are we dealing with different variables/fields?
8983          if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8984            break;
8985        }
8986        // Found overlapping if, at least for one component, reached the head
8987        // of the components list.
8988        if (CI == CE || SI == SE) {
8989          // Ignore it if it is the same component.
8990          if (CI == CE && SI == SE)
8991            continue;
8992          const auto It = (SI == SE) ? CI : SI;
8993          // If one component is a pointer and another one is a kind of
8994          // dereference of this pointer (array subscript, section, dereference,
8995          // etc.), it is not an overlapping.
8996          if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8997              std::prev(It)
8998                  ->getAssociatedExpression()
8999                  ->getType()
9000                  ->isPointerType())
9001            continue;
9002          const MapData &BaseData = CI == CE ? L : L1;
9003          OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9004              SI == SE ? Components : Components1;
9005          auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9006          OverlappedElements.getSecond().push_back(SubData);
9007        }
9008      }
9009    }
9010    // Sort the overlapped elements for each item.
9011    llvm::SmallVector<const FieldDecl *, 4> Layout;
9012    if (!OverlappedData.empty()) {
9013      const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9014      const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9015      while (BaseType != OrigType) {
9016        BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9017        OrigType = BaseType->getPointeeOrArrayElementType();
9018      }
9019
9020      if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9021        getPlainLayout(CRD, Layout, /*AsBase=*/false);
9022      else {
9023        const auto *RD = BaseType->getAsRecordDecl();
9024        Layout.append(RD->field_begin(), RD->field_end());
9025      }
9026    }
9027    for (auto &Pair : OverlappedData) {
9028      llvm::stable_sort(
9029          Pair.getSecond(),
9030          [&Layout](
9031              OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9032              OMPClauseMappableExprCommon::MappableExprComponentListRef
9033                  Second) {
9034            auto CI = First.rbegin();
9035            auto CE = First.rend();
9036            auto SI = Second.rbegin();
9037            auto SE = Second.rend();
9038            for (; CI != CE && SI != SE; ++CI, ++SI) {
9039              if (CI->getAssociatedExpression()->getStmtClass() !=
9040                  SI->getAssociatedExpression()->getStmtClass())
9041                break;
9042              // Are we dealing with different variables/fields?
9043              if (CI->getAssociatedDeclaration() !=
9044                  SI->getAssociatedDeclaration())
9045                break;
9046            }
9047
9048            // Lists contain the same elements.
9049            if (CI == CE && SI == SE)
9050              return false;
9051
9052            // List with less elements is less than list with more elements.
9053            if (CI == CE || SI == SE)
9054              return CI == CE;
9055
9056            const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9057            const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9058            if (FD1->getParent() == FD2->getParent())
9059              return FD1->getFieldIndex() < FD2->getFieldIndex();
9060            const auto It =
9061                llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9062                  return FD == FD1 || FD == FD2;
9063                });
9064            return *It == FD1;
9065          });
9066    }
9067
9068    // Associated with a capture, because the mapping flags depend on it.
9069    // Go through all of the elements with the overlapped elements.
9070    bool IsFirstComponentList = true;
9071    for (const auto &Pair : OverlappedData) {
9072      const MapData &L = *Pair.getFirst();
9073      OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9074      OpenMPMapClauseKind MapType;
9075      ArrayRef<OpenMPMapModifierKind> MapModifiers;
9076      bool IsImplicit;
9077      const ValueDecl *Mapper;
9078      const Expr *VarRef;
9079      std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9080          L;
9081      ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9082          OverlappedComponents = Pair.getSecond();
9083      generateInfoForComponentList(
9084          MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9085          PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9086          /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9087      IsFirstComponentList = false;
9088    }
9089    // Go through other elements without overlapped elements.
9090    for (const MapData &L : DeclComponentLists) {
9091      OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9092      OpenMPMapClauseKind MapType;
9093      ArrayRef<OpenMPMapModifierKind> MapModifiers;
9094      bool IsImplicit;
9095      const ValueDecl *Mapper;
9096      const Expr *VarRef;
9097      std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9098          L;
9099      auto It = OverlappedData.find(&L);
9100      if (It == OverlappedData.end())
9101        generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9102                                     Components, CombinedInfo, PartialStruct,
9103                                     IsFirstComponentList, IsImplicit, Mapper,
9104                                     /*ForDeviceAddr=*/false, VD, VarRef);
9105      IsFirstComponentList = false;
9106    }
9107  }
9108
9109  /// Generate the default map information for a given capture \a CI,
9110  /// record field declaration \a RI and captured value \a CV.
9111  void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9112                              const FieldDecl &RI, llvm::Value *CV,
9113                              MapCombinedInfoTy &CombinedInfo) const {
9114    bool IsImplicit = true;
9115    // Do the default mapping.
9116    if (CI.capturesThis()) {
9117      CombinedInfo.Exprs.push_back(nullptr);
9118      CombinedInfo.BasePointers.push_back(CV);
9119      CombinedInfo.Pointers.push_back(CV);
9120      const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9121      CombinedInfo.Sizes.push_back(
9122          CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9123                                    CGF.Int64Ty, /*isSigned=*/true));
9124      // Default map type.
9125      CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9126    } else if (CI.capturesVariableByCopy()) {
9127      const VarDecl *VD = CI.getCapturedVar();
9128      CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9129      CombinedInfo.BasePointers.push_back(CV);
9130      CombinedInfo.Pointers.push_back(CV);
9131      if (!RI.getType()->isAnyPointerType()) {
9132        // We have to signal to the runtime captures passed by value that are
9133        // not pointers.
9134        CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9135        CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9136            CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9137      } else {
9138        // Pointers are implicitly mapped with a zero size and no flags
9139        // (other than first map that is added for all implicit maps).
9140        CombinedInfo.Types.push_back(OMP_MAP_NONE);
9141        CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9142      }
9143      auto I = FirstPrivateDecls.find(VD);
9144      if (I != FirstPrivateDecls.end())
9145        IsImplicit = I->getSecond();
9146    } else {
9147      assert(CI.capturesVariable() && "Expected captured reference.");
9148      const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9149      QualType ElementType = PtrTy->getPointeeType();
9150      CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9151          CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9152      // The default map type for a scalar/complex type is 'to' because by
9153      // default the value doesn't have to be retrieved. For an aggregate
9154      // type, the default is 'tofrom'.
9155      CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9156      const VarDecl *VD = CI.getCapturedVar();
9157      auto I = FirstPrivateDecls.find(VD);
9158      if (I != FirstPrivateDecls.end() &&
9159          VD->getType().isConstant(CGF.getContext())) {
9160        llvm::Constant *Addr =
9161            CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9162        // Copy the value of the original variable to the new global copy.
9163        CGF.Builder.CreateMemCpy(
9164            CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9165            Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9166            CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
9167        // Use new global variable as the base pointers.
9168        CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9169        CombinedInfo.BasePointers.push_back(Addr);
9170        CombinedInfo.Pointers.push_back(Addr);
9171      } else {
9172        CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9173        CombinedInfo.BasePointers.push_back(CV);
9174        if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9175          Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9176              CV, ElementType, CGF.getContext().getDeclAlign(VD),
9177              AlignmentSource::Decl));
9178          CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9179        } else {
9180          CombinedInfo.Pointers.push_back(CV);
9181        }
9182      }
9183      if (I != FirstPrivateDecls.end())
9184        IsImplicit = I->getSecond();
9185    }
9186    // Every default map produces a single argument which is a target parameter.
9187    CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9188
9189    // Add flag stating this is an implicit map.
9190    if (IsImplicit)
9191      CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9192
9193    // No user-defined mapper for default mapping.
9194    CombinedInfo.Mappers.push_back(nullptr);
9195  }
9196};
9197} // anonymous namespace
9198
9199static void emitNonContiguousDescriptor(
9200    CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9201    CGOpenMPRuntime::TargetDataInfo &Info) {
9202  CodeGenModule &CGM = CGF.CGM;
9203  MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9204      &NonContigInfo = CombinedInfo.NonContigInfo;
9205
9206  // Build an array of struct descriptor_dim and then assign it to
9207  // offload_args.
9208  //
9209  // struct descriptor_dim {
9210  //  uint64_t offset;
9211  //  uint64_t count;
9212  //  uint64_t stride
9213  // };
9214  ASTContext &C = CGF.getContext();
9215  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9216  RecordDecl *RD;
9217  RD = C.buildImplicitRecord("descriptor_dim");
9218  RD->startDefinition();
9219  addFieldToRecordDecl(C, RD, Int64Ty);
9220  addFieldToRecordDecl(C, RD, Int64Ty);
9221  addFieldToRecordDecl(C, RD, Int64Ty);
9222  RD->completeDefinition();
9223  QualType DimTy = C.getRecordType(RD);
9224
9225  enum { OffsetFD = 0, CountFD, StrideFD };
9226  // We need two index variable here since the size of "Dims" is the same as the
9227  // size of Components, however, the size of offset, count, and stride is equal
9228  // to the size of base declaration that is non-contiguous.
9229  for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9230    // Skip emitting ir if dimension size is 1 since it cannot be
9231    // non-contiguous.
9232    if (NonContigInfo.Dims[I] == 1)
9233      continue;
9234    llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9235    QualType ArrayTy =
9236        C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9237    Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9238    for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9239      unsigned RevIdx = EE - II - 1;
9240      LValue DimsLVal = CGF.MakeAddrLValue(
9241          CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9242      // Offset
9243      LValue OffsetLVal = CGF.EmitLValueForField(
9244          DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9245      CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9246      // Count
9247      LValue CountLVal = CGF.EmitLValueForField(
9248          DimsLVal, *std::next(RD->field_begin(), CountFD));
9249      CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9250      // Stride
9251      LValue StrideLVal = CGF.EmitLValueForField(
9252          DimsLVal, *std::next(RD->field_begin(), StrideFD));
9253      CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9254    }
9255    // args[I] = &dims
9256    Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9257        DimsAddr, CGM.Int8PtrTy);
9258    llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9259        llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9260        Info.PointersArray, 0, I);
9261    Address PAddr(P, CGF.getPointerAlign());
9262    CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9263    ++L;
9264  }
9265}
9266
9267/// Emit a string constant containing the names of the values mapped to the
9268/// offloading runtime library.
9269llvm::Constant *
9270emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9271                       MappableExprsHandler::MappingExprInfo &MapExprs) {
9272  llvm::Constant *SrcLocStr;
9273  if (!MapExprs.getMapDecl()) {
9274    SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9275  } else {
9276    std::string ExprName = "";
9277    if (MapExprs.getMapExpr()) {
9278      PrintingPolicy P(CGF.getContext().getLangOpts());
9279      llvm::raw_string_ostream OS(ExprName);
9280      MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9281      OS.flush();
9282    } else {
9283      ExprName = MapExprs.getMapDecl()->getNameAsString();
9284    }
9285
9286    SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9287    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9288    const char *FileName = PLoc.getFilename();
9289    unsigned Line = PLoc.getLine();
9290    unsigned Column = PLoc.getColumn();
9291    SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9292                                                Line, Column);
9293  }
9294  return SrcLocStr;
9295}
9296
9297/// Emit the arrays used to pass the captures and map information to the
9298/// offloading runtime library. If there is no map or capture information,
9299/// return nullptr by reference.
9300static void emitOffloadingArrays(
9301    CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9302    CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9303    bool IsNonContiguous = false) {
9304  CodeGenModule &CGM = CGF.CGM;
9305  ASTContext &Ctx = CGF.getContext();
9306
9307  // Reset the array information.
9308  Info.clearArrayInfo();
9309  Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9310
9311  if (Info.NumberOfPtrs) {
9312    // Detect if we have any capture size requiring runtime evaluation of the
9313    // size so that a constant array could be eventually used.
9314    bool hasRuntimeEvaluationCaptureSize = false;
9315    for (llvm::Value *S : CombinedInfo.Sizes)
9316      if (!isa<llvm::Constant>(S)) {
9317        hasRuntimeEvaluationCaptureSize = true;
9318        break;
9319      }
9320
9321    llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9322    QualType PointerArrayType = Ctx.getConstantArrayType(
9323        Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9324        /*IndexTypeQuals=*/0);
9325
9326    Info.BasePointersArray =
9327        CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9328    Info.PointersArray =
9329        CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9330    Address MappersArray =
9331        CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9332    Info.MappersArray = MappersArray.getPointer();
9333
9334    // If we don't have any VLA types or other types that require runtime
9335    // evaluation, we can use a constant array for the map sizes, otherwise we
9336    // need to fill up the arrays as we do for the pointers.
9337    QualType Int64Ty =
9338        Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9339    if (hasRuntimeEvaluationCaptureSize) {
9340      QualType SizeArrayType = Ctx.getConstantArrayType(
9341          Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9342          /*IndexTypeQuals=*/0);
9343      Info.SizesArray =
9344          CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9345    } else {
9346      // We expect all the sizes to be constant, so we collect them to create
9347      // a constant array.
9348      SmallVector<llvm::Constant *, 16> ConstSizes;
9349      for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9350        if (IsNonContiguous &&
9351            (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9352          ConstSizes.push_back(llvm::ConstantInt::get(
9353              CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9354        } else {
9355          ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9356        }
9357      }
9358
9359      auto *SizesArrayInit = llvm::ConstantArray::get(
9360          llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9361      std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9362      auto *SizesArrayGbl = new llvm::GlobalVariable(
9363          CGM.getModule(), SizesArrayInit->getType(),
9364          /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9365          SizesArrayInit, Name);
9366      SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9367      Info.SizesArray = SizesArrayGbl;
9368    }
9369
9370    // The map types are always constant so we don't need to generate code to
9371    // fill arrays. Instead, we create an array constant.
9372    SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9373    llvm::copy(CombinedInfo.Types, Mapping.begin());
9374    std::string MaptypesName =
9375        CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9376    auto *MapTypesArrayGbl =
9377        OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9378    Info.MapTypesArray = MapTypesArrayGbl;
9379
9380    // The information types are only built if there is debug information
9381    // requested.
9382    if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9383      Info.MapNamesArray = llvm::Constant::getNullValue(
9384          llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9385    } else {
9386      auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9387        return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9388      };
9389      SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9390      llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9391      std::string MapnamesName =
9392          CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9393      auto *MapNamesArrayGbl =
9394          OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9395      Info.MapNamesArray = MapNamesArrayGbl;
9396    }
9397
9398    // If there's a present map type modifier, it must not be applied to the end
9399    // of a region, so generate a separate map type array in that case.
9400    if (Info.separateBeginEndCalls()) {
9401      bool EndMapTypesDiffer = false;
9402      for (uint64_t &Type : Mapping) {
9403        if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9404          Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9405          EndMapTypesDiffer = true;
9406        }
9407      }
9408      if (EndMapTypesDiffer) {
9409        MapTypesArrayGbl =
9410            OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9411        Info.MapTypesArrayEnd = MapTypesArrayGbl;
9412      }
9413    }
9414
9415    for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9416      llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9417      llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9418          llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9419          Info.BasePointersArray, 0, I);
9420      BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9421          BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9422      Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9423      CGF.Builder.CreateStore(BPVal, BPAddr);
9424
9425      if (Info.requiresDevicePointerInfo())
9426        if (const ValueDecl *DevVD =
9427                CombinedInfo.BasePointers[I].getDevicePtrDecl())
9428          Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9429
9430      llvm::Value *PVal = CombinedInfo.Pointers[I];
9431      llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9432          llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9433          Info.PointersArray, 0, I);
9434      P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9435          P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9436      Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9437      CGF.Builder.CreateStore(PVal, PAddr);
9438
9439      if (hasRuntimeEvaluationCaptureSize) {
9440        llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9441            llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9442            Info.SizesArray,
9443            /*Idx0=*/0,
9444            /*Idx1=*/I);
9445        Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9446        CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9447                                                          CGM.Int64Ty,
9448                                                          /*isSigned=*/true),
9449                                SAddr);
9450      }
9451
9452      // Fill up the mapper array.
9453      llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9454      if (CombinedInfo.Mappers[I]) {
9455        MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9456            cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9457        MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9458        Info.HasMapper = true;
9459      }
9460      Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9461      CGF.Builder.CreateStore(MFunc, MAddr);
9462    }
9463  }
9464
9465  if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9466      Info.NumberOfPtrs == 0)
9467    return;
9468
9469  emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9470}
9471
9472namespace {
9473/// Additional arguments for emitOffloadingArraysArgument function.
9474struct ArgumentsOptions {
9475  bool ForEndCall = false;
9476  ArgumentsOptions() = default;
9477  ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9478};
9479} // namespace
9480
9481/// Emit the arguments to be passed to the runtime library based on the
9482/// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9483/// ForEndCall, emit map types to be passed for the end of the region instead of
9484/// the beginning.
9485static void emitOffloadingArraysArgument(
9486    CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9487    llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9488    llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9489    llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9490    const ArgumentsOptions &Options = ArgumentsOptions()) {
9491  assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9492         "expected region end call to runtime only when end call is separate");
9493  CodeGenModule &CGM = CGF.CGM;
9494  if (Info.NumberOfPtrs) {
9495    BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9496        llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9497        Info.BasePointersArray,
9498        /*Idx0=*/0, /*Idx1=*/0);
9499    PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9500        llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9501        Info.PointersArray,
9502        /*Idx0=*/0,
9503        /*Idx1=*/0);
9504    SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9505        llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9506        /*Idx0=*/0, /*Idx1=*/0);
9507    MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9508        llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9509        Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9510                                                    : Info.MapTypesArray,
9511        /*Idx0=*/0,
9512        /*Idx1=*/0);
9513
9514    // Only emit the mapper information arrays if debug information is
9515    // requested.
9516    if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9517      MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9518    else
9519      MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9520          llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9521          Info.MapNamesArray,
9522          /*Idx0=*/0,
9523          /*Idx1=*/0);
9524    // If there is no user-defined mapper, set the mapper array to nullptr to
9525    // avoid an unnecessary data privatization
9526    if (!Info.HasMapper)
9527      MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9528    else
9529      MappersArrayArg =
9530          CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9531  } else {
9532    BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9533    PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9534    SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9535    MapTypesArrayArg =
9536        llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9537    MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9538    MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9539  }
9540}
9541
9542/// Check for inner distribute directive.
9543static const OMPExecutableDirective *
9544getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9545  const auto *CS = D.getInnermostCapturedStmt();
9546  const auto *Body =
9547      CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9548  const Stmt *ChildStmt =
9549      CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9550
9551  if (const auto *NestedDir =
9552          dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9553    OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9554    switch (D.getDirectiveKind()) {
9555    case OMPD_target:
9556      if (isOpenMPDistributeDirective(DKind))
9557        return NestedDir;
9558      if (DKind == OMPD_teams) {
9559        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9560            /*IgnoreCaptured=*/true);
9561        if (!Body)
9562          return nullptr;
9563        ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9564        if (const auto *NND =
9565                dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9566          DKind = NND->getDirectiveKind();
9567          if (isOpenMPDistributeDirective(DKind))
9568            return NND;
9569        }
9570      }
9571      return nullptr;
9572    case OMPD_target_teams:
9573      if (isOpenMPDistributeDirective(DKind))
9574        return NestedDir;
9575      return nullptr;
9576    case OMPD_target_parallel:
9577    case OMPD_target_simd:
9578    case OMPD_target_parallel_for:
9579    case OMPD_target_parallel_for_simd:
9580      return nullptr;
9581    case OMPD_target_teams_distribute:
9582    case OMPD_target_teams_distribute_simd:
9583    case OMPD_target_teams_distribute_parallel_for:
9584    case OMPD_target_teams_distribute_parallel_for_simd:
9585    case OMPD_parallel:
9586    case OMPD_for:
9587    case OMPD_parallel_for:
9588    case OMPD_parallel_master:
9589    case OMPD_parallel_sections:
9590    case OMPD_for_simd:
9591    case OMPD_parallel_for_simd:
9592    case OMPD_cancel:
9593    case OMPD_cancellation_point:
9594    case OMPD_ordered:
9595    case OMPD_threadprivate:
9596    case OMPD_allocate:
9597    case OMPD_task:
9598    case OMPD_simd:
9599    case OMPD_tile:
9600    case OMPD_sections:
9601    case OMPD_section:
9602    case OMPD_single:
9603    case OMPD_master:
9604    case OMPD_critical:
9605    case OMPD_taskyield:
9606    case OMPD_barrier:
9607    case OMPD_taskwait:
9608    case OMPD_taskgroup:
9609    case OMPD_atomic:
9610    case OMPD_flush:
9611    case OMPD_depobj:
9612    case OMPD_scan:
9613    case OMPD_teams:
9614    case OMPD_target_data:
9615    case OMPD_target_exit_data:
9616    case OMPD_target_enter_data:
9617    case OMPD_distribute:
9618    case OMPD_distribute_simd:
9619    case OMPD_distribute_parallel_for:
9620    case OMPD_distribute_parallel_for_simd:
9621    case OMPD_teams_distribute:
9622    case OMPD_teams_distribute_simd:
9623    case OMPD_teams_distribute_parallel_for:
9624    case OMPD_teams_distribute_parallel_for_simd:
9625    case OMPD_target_update:
9626    case OMPD_declare_simd:
9627    case OMPD_declare_variant:
9628    case OMPD_begin_declare_variant:
9629    case OMPD_end_declare_variant:
9630    case OMPD_declare_target:
9631    case OMPD_end_declare_target:
9632    case OMPD_declare_reduction:
9633    case OMPD_declare_mapper:
9634    case OMPD_taskloop:
9635    case OMPD_taskloop_simd:
9636    case OMPD_master_taskloop:
9637    case OMPD_master_taskloop_simd:
9638    case OMPD_parallel_master_taskloop:
9639    case OMPD_parallel_master_taskloop_simd:
9640    case OMPD_requires:
9641    case OMPD_unknown:
9642    default:
9643      llvm_unreachable("Unexpected directive.");
9644    }
9645  }
9646
9647  return nullptr;
9648}
9649
9650/// Emit the user-defined mapper function. The code generation follows the
9651/// pattern in the example below.
9652/// \code
9653/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9654///                                           void *base, void *begin,
9655///                                           int64_t size, int64_t type,
9656///                                           void *name = nullptr) {
9657///   // Allocate space for an array section first or add a base/begin for
9658///   // pointer dereference.
9659///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9660///       !maptype.IsDelete)
9661///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9662///                                 size*sizeof(Ty), clearToFromMember(type));
9663///   // Map members.
9664///   for (unsigned i = 0; i < size; i++) {
9665///     // For each component specified by this mapper:
9666///     for (auto c : begin[i]->all_components) {
9667///       if (c.hasMapper())
9668///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9669///                       c.arg_type, c.arg_name);
9670///       else
9671///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9672///                                     c.arg_begin, c.arg_size, c.arg_type,
9673///                                     c.arg_name);
9674///     }
9675///   }
9676///   // Delete the array section.
9677///   if (size > 1 && maptype.IsDelete)
9678///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9679///                                 size*sizeof(Ty), clearToFromMember(type));
9680/// }
9681/// \endcode
9682void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9683                                            CodeGenFunction *CGF) {
9684  if (UDMMap.count(D) > 0)
9685    return;
9686  ASTContext &C = CGM.getContext();
9687  QualType Ty = D->getType();
9688  QualType PtrTy = C.getPointerType(Ty).withRestrict();
9689  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9690  auto *MapperVarDecl =
9691      cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9692  SourceLocation Loc = D->getLocation();
9693  CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9694
9695  // Prepare mapper function arguments and attributes.
9696  ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9697                              C.VoidPtrTy, ImplicitParamDecl::Other);
9698  ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9699                            ImplicitParamDecl::Other);
9700  ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9701                             C.VoidPtrTy, ImplicitParamDecl::Other);
9702  ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9703                            ImplicitParamDecl::Other);
9704  ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9705                            ImplicitParamDecl::Other);
9706  ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9707                            ImplicitParamDecl::Other);
9708  FunctionArgList Args;
9709  Args.push_back(&HandleArg);
9710  Args.push_back(&BaseArg);
9711  Args.push_back(&BeginArg);
9712  Args.push_back(&SizeArg);
9713  Args.push_back(&TypeArg);
9714  Args.push_back(&NameArg);
9715  const CGFunctionInfo &FnInfo =
9716      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9717  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9718  SmallString<64> TyStr;
9719  llvm::raw_svector_ostream Out(TyStr);
9720  CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9721  std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9722  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9723                                    Name, &CGM.getModule());
9724  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9725  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9726  // Start the mapper function code generation.
9727  CodeGenFunction MapperCGF(CGM);
9728  MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9729  // Compute the starting and end addresses of array elements.
9730  llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9731      MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9732      C.getPointerType(Int64Ty), Loc);
9733  // Prepare common arguments for array initiation and deletion.
9734  llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9735      MapperCGF.GetAddrOfLocalVar(&HandleArg),
9736      /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9737  llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9738      MapperCGF.GetAddrOfLocalVar(&BaseArg),
9739      /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9740  llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9741      MapperCGF.GetAddrOfLocalVar(&BeginArg),
9742      /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9743  // Convert the size in bytes into the number of array elements.
9744  Size = MapperCGF.Builder.CreateExactUDiv(
9745      Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9746  llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9747      BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9748  llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9749  llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9750      MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9751      C.getPointerType(Int64Ty), Loc);
9752  llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9753      MapperCGF.GetAddrOfLocalVar(&NameArg),
9754      /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9755
9756  // Emit array initiation if this is an array section and \p MapType indicates
9757  // that memory allocation is required.
9758  llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9759  emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9760                             MapName, ElementSize, HeadBB, /*IsInit=*/true);
9761
9762  // Emit a for loop to iterate through SizeArg of elements and map all of them.
9763
9764  // Emit the loop header block.
9765  MapperCGF.EmitBlock(HeadBB);
9766  llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9767  llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9768  // Evaluate whether the initial condition is satisfied.
9769  llvm::Value *IsEmpty =
9770      MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9771  MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9772  llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9773
9774  // Emit the loop body block.
9775  MapperCGF.EmitBlock(BodyBB);
9776  llvm::BasicBlock *LastBB = BodyBB;
9777  llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9778      PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9779  PtrPHI->addIncoming(PtrBegin, EntryBB);
9780  Address PtrCurrent =
9781      Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9782                          .getAlignment()
9783                          .alignmentOfArrayElement(ElementSize));
9784  // Privatize the declared variable of mapper to be the current array element.
9785  CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9786  Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
9787  (void)Scope.Privatize();
9788
9789  // Get map clause information. Fill up the arrays with all mapped variables.
9790  MappableExprsHandler::MapCombinedInfoTy Info;
9791  MappableExprsHandler MEHandler(*D, MapperCGF);
9792  MEHandler.generateAllInfoForMapper(Info);
9793
9794  // Call the runtime API __tgt_mapper_num_components to get the number of
9795  // pre-existing components.
9796  llvm::Value *OffloadingArgs[] = {Handle};
9797  llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9798      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9799                                            OMPRTL___tgt_mapper_num_components),
9800      OffloadingArgs);
9801  llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9802      PreviousSize,
9803      MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9804
9805  // Fill up the runtime mapper handle for all components.
9806  for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9807    llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9808        *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9809    llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9810        Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9811    llvm::Value *CurSizeArg = Info.Sizes[I];
9812    llvm::Value *CurNameArg =
9813        (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9814            ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9815            : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9816
9817    // Extract the MEMBER_OF field from the map type.
9818    llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9819    llvm::Value *MemberMapType =
9820        MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9821
9822    // Combine the map type inherited from user-defined mapper with that
9823    // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9824    // bits of the \a MapType, which is the input argument of the mapper
9825    // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9826    // bits of MemberMapType.
9827    // [OpenMP 5.0], 1.2.6. map-type decay.
9828    //        | alloc |  to   | from  | tofrom | release | delete
9829    // ----------------------------------------------------------
9830    // alloc  | alloc | alloc | alloc | alloc  | release | delete
9831    // to     | alloc |  to   | alloc |   to   | release | delete
9832    // from   | alloc | alloc | from  |  from  | release | delete
9833    // tofrom | alloc |  to   | from  | tofrom | release | delete
9834    llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9835        MapType,
9836        MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9837                                   MappableExprsHandler::OMP_MAP_FROM));
9838    llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9839    llvm::BasicBlock *AllocElseBB =
9840        MapperCGF.createBasicBlock("omp.type.alloc.else");
9841    llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9842    llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9843    llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9844    llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9845    llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9846    MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9847    // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9848    MapperCGF.EmitBlock(AllocBB);
9849    llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9850        MemberMapType,
9851        MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9852                                     MappableExprsHandler::OMP_MAP_FROM)));
9853    MapperCGF.Builder.CreateBr(EndBB);
9854    MapperCGF.EmitBlock(AllocElseBB);
9855    llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9856        LeftToFrom,
9857        MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9858    MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9859    // In case of to, clear OMP_MAP_FROM.
9860    MapperCGF.EmitBlock(ToBB);
9861    llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9862        MemberMapType,
9863        MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9864    MapperCGF.Builder.CreateBr(EndBB);
9865    MapperCGF.EmitBlock(ToElseBB);
9866    llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9867        LeftToFrom,
9868        MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9869    MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9870    // In case of from, clear OMP_MAP_TO.
9871    MapperCGF.EmitBlock(FromBB);
9872    llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9873        MemberMapType,
9874        MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9875    // In case of tofrom, do nothing.
9876    MapperCGF.EmitBlock(EndBB);
9877    LastBB = EndBB;
9878    llvm::PHINode *CurMapType =
9879        MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9880    CurMapType->addIncoming(AllocMapType, AllocBB);
9881    CurMapType->addIncoming(ToMapType, ToBB);
9882    CurMapType->addIncoming(FromMapType, FromBB);
9883    CurMapType->addIncoming(MemberMapType, ToElseBB);
9884
9885    llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9886                                     CurSizeArg, CurMapType, CurNameArg};
9887    if (Info.Mappers[I]) {
9888      // Call the corresponding mapper function.
9889      llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9890          cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9891      assert(MapperFunc && "Expect a valid mapper function is available.");
9892      MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9893    } else {
9894      // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9895      // data structure.
9896      MapperCGF.EmitRuntimeCall(
9897          OMPBuilder.getOrCreateRuntimeFunction(
9898              CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9899          OffloadingArgs);
9900    }
9901  }
9902
9903  // Update the pointer to point to the next element that needs to be mapped,
9904  // and check whether we have mapped all elements.
9905  llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9906      PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9907  PtrPHI->addIncoming(PtrNext, LastBB);
9908  llvm::Value *IsDone =
9909      MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9910  llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9911  MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9912
9913  MapperCGF.EmitBlock(ExitBB);
9914  // Emit array deletion if this is an array section and \p MapType indicates
9915  // that deletion is required.
9916  emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9917                             MapName, ElementSize, DoneBB, /*IsInit=*/false);
9918
9919  // Emit the function exit block.
9920  MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9921  MapperCGF.FinishFunction();
9922  UDMMap.try_emplace(D, Fn);
9923  if (CGF) {
9924    auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9925    Decls.second.push_back(D);
9926  }
9927}
9928
9929/// Emit the array initialization or deletion portion for user-defined mapper
9930/// code generation. First, it evaluates whether an array section is mapped and
9931/// whether the \a MapType instructs to delete this section. If \a IsInit is
9932/// true, and \a MapType indicates to not delete this array, array
9933/// initialization code is generated. If \a IsInit is false, and \a MapType
9934/// indicates to not this array, array deletion code is generated.
9935void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9936    CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9937    llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9938    llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9939    bool IsInit) {
9940  StringRef Prefix = IsInit ? ".init" : ".del";
9941
9942  // Evaluate if this is an array section.
9943  llvm::BasicBlock *BodyBB =
9944      MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9945  llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9946      Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9947  llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9948      MapType,
9949      MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9950  llvm::Value *DeleteCond;
9951  llvm::Value *Cond;
9952  if (IsInit) {
9953    // base != begin?
9954    llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
9955        MapperCGF.Builder.CreatePtrDiff(Base, Begin));
9956    // IsPtrAndObj?
9957    llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9958        MapType,
9959        MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
9960    PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9961    BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9962    Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9963    DeleteCond = MapperCGF.Builder.CreateIsNull(
9964        DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9965  } else {
9966    Cond = IsArray;
9967    DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9968        DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9969  }
9970  Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9971  MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9972
9973  MapperCGF.EmitBlock(BodyBB);
9974  // Get the array size by multiplying element size and element number (i.e., \p
9975  // Size).
9976  llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9977      Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9978  // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9979  // memory allocation/deletion purpose only.
9980  llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9981      MapType,
9982      MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9983                                   MappableExprsHandler::OMP_MAP_FROM)));
9984  MapTypeArg = MapperCGF.Builder.CreateOr(
9985      MapTypeArg,
9986      MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
9987
9988  // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9989  // data structure.
9990  llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9991                                   ArraySize, MapTypeArg, MapName};
9992  MapperCGF.EmitRuntimeCall(
9993      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9994                                            OMPRTL___tgt_push_mapper_component),
9995      OffloadingArgs);
9996}
9997
9998llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9999    const OMPDeclareMapperDecl *D) {
10000  auto I = UDMMap.find(D);
10001  if (I != UDMMap.end())
10002    return I->second;
10003  emitUserDefinedMapper(D);
10004  return UDMMap.lookup(D);
10005}
10006
10007void CGOpenMPRuntime::emitTargetNumIterationsCall(
10008    CodeGenFunction &CGF, const OMPExecutableDirective &D,
10009    llvm::Value *DeviceID,
10010    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10011                                     const OMPLoopDirective &D)>
10012        SizeEmitter) {
10013  OpenMPDirectiveKind Kind = D.getDirectiveKind();
10014  const OMPExecutableDirective *TD = &D;
10015  // Get nested teams distribute kind directive, if any.
10016  if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10017    TD = getNestedDistributeDirective(CGM.getContext(), D);
10018  if (!TD)
10019    return;
10020  const auto *LD = cast<OMPLoopDirective>(TD);
10021  auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10022                                                         PrePostActionTy &) {
10023    if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10024      llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10025      llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10026      CGF.EmitRuntimeCall(
10027          OMPBuilder.getOrCreateRuntimeFunction(
10028              CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10029          Args);
10030    }
10031  };
10032  emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10033}
10034
10035void CGOpenMPRuntime::emitTargetCall(
10036    CodeGenFunction &CGF, const OMPExecutableDirective &D,
10037    llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10038    llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10039    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10040                                     const OMPLoopDirective &D)>
10041        SizeEmitter) {
10042  if (!CGF.HaveInsertPoint())
10043    return;
10044
10045  assert(OutlinedFn && "Invalid outlined function!");
10046
10047  const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10048                                 D.hasClausesOfKind<OMPNowaitClause>();
10049  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10050  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10051  auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10052                                            PrePostActionTy &) {
10053    CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10054  };
10055  emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10056
10057  CodeGenFunction::OMPTargetDataInfo InputInfo;
10058  llvm::Value *MapTypesArray = nullptr;
10059  llvm::Value *MapNamesArray = nullptr;
10060  // Fill up the pointer arrays and transfer execution to the device.
10061  auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10062                    &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10063                    &CapturedVars,
10064                    SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10065    if (Device.getInt() == OMPC_DEVICE_ancestor) {
10066      // Reverse offloading is not supported, so just execute on the host.
10067      if (RequiresOuterTask) {
10068        CapturedVars.clear();
10069        CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10070      }
10071      emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10072      return;
10073    }
10074
10075    // On top of the arrays that were filled up, the target offloading call
10076    // takes as arguments the device id as well as the host pointer. The host
10077    // pointer is used by the runtime library to identify the current target
10078    // region, so it only has to be unique and not necessarily point to
10079    // anything. It could be the pointer to the outlined function that
10080    // implements the target region, but we aren't using that so that the
10081    // compiler doesn't need to keep that, and could therefore inline the host
10082    // function if proven worthwhile during optimization.
10083
10084    // From this point on, we need to have an ID of the target region defined.
10085    assert(OutlinedFnID && "Invalid outlined function ID!");
10086
10087    // Emit device ID if any.
10088    llvm::Value *DeviceID;
10089    if (Device.getPointer()) {
10090      assert((Device.getInt() == OMPC_DEVICE_unknown ||
10091              Device.getInt() == OMPC_DEVICE_device_num) &&
10092             "Expected device_num modifier.");
10093      llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10094      DeviceID =
10095          CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10096    } else {
10097      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10098    }
10099
10100    // Emit the number of elements in the offloading arrays.
10101    llvm::Value *PointerNum =
10102        CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10103
10104    // Return value of the runtime offloading call.
10105    llvm::Value *Return;
10106
10107    llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10108    llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10109
10110    // Source location for the ident struct
10111    llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10112
10113    // Emit tripcount for the target loop-based directive.
10114    emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10115
10116    bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10117    // The target region is an outlined function launched by the runtime
10118    // via calls __tgt_target() or __tgt_target_teams().
10119    //
10120    // __tgt_target() launches a target region with one team and one thread,
10121    // executing a serial region.  This master thread may in turn launch
10122    // more threads within its team upon encountering a parallel region,
10123    // however, no additional teams can be launched on the device.
10124    //
10125    // __tgt_target_teams() launches a target region with one or more teams,
10126    // each with one or more threads.  This call is required for target
10127    // constructs such as:
10128    //  'target teams'
10129    //  'target' / 'teams'
10130    //  'target teams distribute parallel for'
10131    //  'target parallel'
10132    // and so on.
10133    //
10134    // Note that on the host and CPU targets, the runtime implementation of
10135    // these calls simply call the outlined function without forking threads.
10136    // The outlined functions themselves have runtime calls to
10137    // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10138    // the compiler in emitTeamsCall() and emitParallelCall().
10139    //
10140    // In contrast, on the NVPTX target, the implementation of
10141    // __tgt_target_teams() launches a GPU kernel with the requested number
10142    // of teams and threads so no additional calls to the runtime are required.
10143    if (NumTeams) {
10144      // If we have NumTeams defined this means that we have an enclosed teams
10145      // region. Therefore we also expect to have NumThreads defined. These two
10146      // values should be defined in the presence of a teams directive,
10147      // regardless of having any clauses associated. If the user is using teams
10148      // but no clauses, these two values will be the default that should be
10149      // passed to the runtime library - a 32-bit integer with the value zero.
10150      assert(NumThreads && "Thread limit expression should be available along "
10151                           "with number of teams.");
10152      llvm::Value *OffloadingArgs[] = {RTLoc,
10153                                       DeviceID,
10154                                       OutlinedFnID,
10155                                       PointerNum,
10156                                       InputInfo.BasePointersArray.getPointer(),
10157                                       InputInfo.PointersArray.getPointer(),
10158                                       InputInfo.SizesArray.getPointer(),
10159                                       MapTypesArray,
10160                                       MapNamesArray,
10161                                       InputInfo.MappersArray.getPointer(),
10162                                       NumTeams,
10163                                       NumThreads};
10164      Return = CGF.EmitRuntimeCall(
10165          OMPBuilder.getOrCreateRuntimeFunction(
10166              CGM.getModule(), HasNowait
10167                                   ? OMPRTL___tgt_target_teams_nowait_mapper
10168                                   : OMPRTL___tgt_target_teams_mapper),
10169          OffloadingArgs);
10170    } else {
10171      llvm::Value *OffloadingArgs[] = {RTLoc,
10172                                       DeviceID,
10173                                       OutlinedFnID,
10174                                       PointerNum,
10175                                       InputInfo.BasePointersArray.getPointer(),
10176                                       InputInfo.PointersArray.getPointer(),
10177                                       InputInfo.SizesArray.getPointer(),
10178                                       MapTypesArray,
10179                                       MapNamesArray,
10180                                       InputInfo.MappersArray.getPointer()};
10181      Return = CGF.EmitRuntimeCall(
10182          OMPBuilder.getOrCreateRuntimeFunction(
10183              CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10184                                         : OMPRTL___tgt_target_mapper),
10185          OffloadingArgs);
10186    }
10187
10188    // Check the error code and execute the host version if required.
10189    llvm::BasicBlock *OffloadFailedBlock =
10190        CGF.createBasicBlock("omp_offload.failed");
10191    llvm::BasicBlock *OffloadContBlock =
10192        CGF.createBasicBlock("omp_offload.cont");
10193    llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10194    CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10195
10196    CGF.EmitBlock(OffloadFailedBlock);
10197    if (RequiresOuterTask) {
10198      CapturedVars.clear();
10199      CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10200    }
10201    emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10202    CGF.EmitBranch(OffloadContBlock);
10203
10204    CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10205  };
10206
10207  // Notify that the host version must be executed.
10208  auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10209                    RequiresOuterTask](CodeGenFunction &CGF,
10210                                       PrePostActionTy &) {
10211    if (RequiresOuterTask) {
10212      CapturedVars.clear();
10213      CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10214    }
10215    emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10216  };
10217
10218  auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10219                          &MapNamesArray, &CapturedVars, RequiresOuterTask,
10220                          &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10221    // Fill up the arrays with all the captured variables.
10222    MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10223
10224    // Get mappable expression information.
10225    MappableExprsHandler MEHandler(D, CGF);
10226    llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10227    llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10228
10229    auto RI = CS.getCapturedRecordDecl()->field_begin();
10230    auto *CV = CapturedVars.begin();
10231    for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10232                                              CE = CS.capture_end();
10233         CI != CE; ++CI, ++RI, ++CV) {
10234      MappableExprsHandler::MapCombinedInfoTy CurInfo;
10235      MappableExprsHandler::StructRangeInfoTy PartialStruct;
10236
10237      // VLA sizes are passed to the outlined region by copy and do not have map
10238      // information associated.
10239      if (CI->capturesVariableArrayType()) {
10240        CurInfo.Exprs.push_back(nullptr);
10241        CurInfo.BasePointers.push_back(*CV);
10242        CurInfo.Pointers.push_back(*CV);
10243        CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10244            CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10245        // Copy to the device as an argument. No need to retrieve it.
10246        CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10247                                MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10248                                MappableExprsHandler::OMP_MAP_IMPLICIT);
10249        CurInfo.Mappers.push_back(nullptr);
10250      } else {
10251        // If we have any information in the map clause, we use it, otherwise we
10252        // just do a default mapping.
10253        MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10254        if (!CI->capturesThis())
10255          MappedVarSet.insert(CI->getCapturedVar());
10256        else
10257          MappedVarSet.insert(nullptr);
10258        if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10259          MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10260        // Generate correct mapping for variables captured by reference in
10261        // lambdas.
10262        if (CI->capturesVariable())
10263          MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10264                                                  CurInfo, LambdaPointers);
10265      }
10266      // We expect to have at least an element of information for this capture.
10267      assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10268             "Non-existing map pointer for capture!");
10269      assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10270             CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10271             CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10272             CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10273             "Inconsistent map information sizes!");
10274
10275      // If there is an entry in PartialStruct it means we have a struct with
10276      // individual members mapped. Emit an extra combined entry.
10277      if (PartialStruct.Base.isValid()) {
10278        CombinedInfo.append(PartialStruct.PreliminaryMapData);
10279        MEHandler.emitCombinedEntry(
10280            CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10281            !PartialStruct.PreliminaryMapData.BasePointers.empty());
10282      }
10283
10284      // We need to append the results of this capture to what we already have.
10285      CombinedInfo.append(CurInfo);
10286    }
10287    // Adjust MEMBER_OF flags for the lambdas captures.
10288    MEHandler.adjustMemberOfForLambdaCaptures(
10289        LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10290        CombinedInfo.Types);
10291    // Map any list items in a map clause that were not captures because they
10292    // weren't referenced within the construct.
10293    MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10294
10295    TargetDataInfo Info;
10296    // Fill up the arrays and create the arguments.
10297    emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10298    emitOffloadingArraysArgument(
10299        CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10300        Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10301        {/*ForEndTask=*/false});
10302
10303    InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10304    InputInfo.BasePointersArray =
10305        Address(Info.BasePointersArray, CGM.getPointerAlign());
10306    InputInfo.PointersArray =
10307        Address(Info.PointersArray, CGM.getPointerAlign());
10308    InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10309    InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10310    MapTypesArray = Info.MapTypesArray;
10311    MapNamesArray = Info.MapNamesArray;
10312    if (RequiresOuterTask)
10313      CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10314    else
10315      emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10316  };
10317
10318  auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10319                             CodeGenFunction &CGF, PrePostActionTy &) {
10320    if (RequiresOuterTask) {
10321      CodeGenFunction::OMPTargetDataInfo InputInfo;
10322      CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10323    } else {
10324      emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10325    }
10326  };
10327
10328  // If we have a target function ID it means that we need to support
10329  // offloading, otherwise, just execute on the host. We need to execute on host
10330  // regardless of the conditional in the if clause if, e.g., the user do not
10331  // specify target triples.
10332  if (OutlinedFnID) {
10333    if (IfCond) {
10334      emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10335    } else {
10336      RegionCodeGenTy ThenRCG(TargetThenGen);
10337      ThenRCG(CGF);
10338    }
10339  } else {
10340    RegionCodeGenTy ElseRCG(TargetElseGen);
10341    ElseRCG(CGF);
10342  }
10343}
10344
10345void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10346                                                    StringRef ParentName) {
10347  if (!S)
10348    return;
10349
10350  // Codegen OMP target directives that offload compute to the device.
10351  bool RequiresDeviceCodegen =
10352      isa<OMPExecutableDirective>(S) &&
10353      isOpenMPTargetExecutionDirective(
10354          cast<OMPExecutableDirective>(S)->getDirectiveKind());
10355
10356  if (RequiresDeviceCodegen) {
10357    const auto &E = *cast<OMPExecutableDirective>(S);
10358    unsigned DeviceID;
10359    unsigned FileID;
10360    unsigned Line;
10361    getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10362                             FileID, Line);
10363
10364    // Is this a target region that should not be emitted as an entry point? If
10365    // so just signal we are done with this target region.
10366    if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10367                                                            ParentName, Line))
10368      return;
10369
10370    switch (E.getDirectiveKind()) {
10371    case OMPD_target:
10372      CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10373                                                   cast<OMPTargetDirective>(E));
10374      break;
10375    case OMPD_target_parallel:
10376      CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10377          CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10378      break;
10379    case OMPD_target_teams:
10380      CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10381          CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10382      break;
10383    case OMPD_target_teams_distribute:
10384      CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10385          CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10386      break;
10387    case OMPD_target_teams_distribute_simd:
10388      CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10389          CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10390      break;
10391    case OMPD_target_parallel_for:
10392      CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10393          CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10394      break;
10395    case OMPD_target_parallel_for_simd:
10396      CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10397          CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10398      break;
10399    case OMPD_target_simd:
10400      CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10401          CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10402      break;
10403    case OMPD_target_teams_distribute_parallel_for:
10404      CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10405          CGM, ParentName,
10406          cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10407      break;
10408    case OMPD_target_teams_distribute_parallel_for_simd:
10409      CodeGenFunction::
10410          EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10411              CGM, ParentName,
10412              cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10413      break;
10414    case OMPD_parallel:
10415    case OMPD_for:
10416    case OMPD_parallel_for:
10417    case OMPD_parallel_master:
10418    case OMPD_parallel_sections:
10419    case OMPD_for_simd:
10420    case OMPD_parallel_for_simd:
10421    case OMPD_cancel:
10422    case OMPD_cancellation_point:
10423    case OMPD_ordered:
10424    case OMPD_threadprivate:
10425    case OMPD_allocate:
10426    case OMPD_task:
10427    case OMPD_simd:
10428    case OMPD_tile:
10429    case OMPD_sections:
10430    case OMPD_section:
10431    case OMPD_single:
10432    case OMPD_master:
10433    case OMPD_critical:
10434    case OMPD_taskyield:
10435    case OMPD_barrier:
10436    case OMPD_taskwait:
10437    case OMPD_taskgroup:
10438    case OMPD_atomic:
10439    case OMPD_flush:
10440    case OMPD_depobj:
10441    case OMPD_scan:
10442    case OMPD_teams:
10443    case OMPD_target_data:
10444    case OMPD_target_exit_data:
10445    case OMPD_target_enter_data:
10446    case OMPD_distribute:
10447    case OMPD_distribute_simd:
10448    case OMPD_distribute_parallel_for:
10449    case OMPD_distribute_parallel_for_simd:
10450    case OMPD_teams_distribute:
10451    case OMPD_teams_distribute_simd:
10452    case OMPD_teams_distribute_parallel_for:
10453    case OMPD_teams_distribute_parallel_for_simd:
10454    case OMPD_target_update:
10455    case OMPD_declare_simd:
10456    case OMPD_declare_variant:
10457    case OMPD_begin_declare_variant:
10458    case OMPD_end_declare_variant:
10459    case OMPD_declare_target:
10460    case OMPD_end_declare_target:
10461    case OMPD_declare_reduction:
10462    case OMPD_declare_mapper:
10463    case OMPD_taskloop:
10464    case OMPD_taskloop_simd:
10465    case OMPD_master_taskloop:
10466    case OMPD_master_taskloop_simd:
10467    case OMPD_parallel_master_taskloop:
10468    case OMPD_parallel_master_taskloop_simd:
10469    case OMPD_requires:
10470    case OMPD_unknown:
10471    default:
10472      llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10473    }
10474    return;
10475  }
10476
10477  if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10478    if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10479      return;
10480
10481    scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10482    return;
10483  }
10484
10485  // If this is a lambda function, look into its body.
10486  if (const auto *L = dyn_cast<LambdaExpr>(S))
10487    S = L->getBody();
10488
10489  // Keep looking for target regions recursively.
10490  for (const Stmt *II : S->children())
10491    scanForTargetRegionsFunctions(II, ParentName);
10492}
10493
10494static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10495  Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10496      OMPDeclareTargetDeclAttr::getDeviceType(VD);
10497  if (!DevTy)
10498    return false;
10499  // Do not emit device_type(nohost) functions for the host.
10500  if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10501    return true;
10502  // Do not emit device_type(host) functions for the device.
10503  if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10504    return true;
10505  return false;
10506}
10507
10508bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10509  // If emitting code for the host, we do not process FD here. Instead we do
10510  // the normal code generation.
10511  if (!CGM.getLangOpts().OpenMPIsDevice) {
10512    if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10513      if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10514                                  CGM.getLangOpts().OpenMPIsDevice))
10515        return true;
10516    return false;
10517  }
10518
10519  const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10520  // Try to detect target regions in the function.
10521  if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10522    StringRef Name = CGM.getMangledName(GD);
10523    scanForTargetRegionsFunctions(FD->getBody(), Name);
10524    if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10525                                CGM.getLangOpts().OpenMPIsDevice))
10526      return true;
10527  }
10528
10529  // Do not to emit function if it is not marked as declare target.
10530  return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10531         AlreadyEmittedTargetDecls.count(VD) == 0;
10532}
10533
10534bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10535  if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10536                              CGM.getLangOpts().OpenMPIsDevice))
10537    return true;
10538
10539  if (!CGM.getLangOpts().OpenMPIsDevice)
10540    return false;
10541
10542  // Check if there are Ctors/Dtors in this declaration and look for target
10543  // regions in it. We use the complete variant to produce the kernel name
10544  // mangling.
10545  QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10546  if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10547    for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10548      StringRef ParentName =
10549          CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10550      scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10551    }
10552    if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10553      StringRef ParentName =
10554          CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10555      scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10556    }
10557  }
10558
10559  // Do not to emit variable if it is not marked as declare target.
10560  llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10561      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10562          cast<VarDecl>(GD.getDecl()));
10563  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10564      (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10565       HasRequiresUnifiedSharedMemory)) {
10566    DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10567    return true;
10568  }
10569  return false;
10570}
10571
10572llvm::Constant *
10573CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10574                                                const VarDecl *VD) {
10575  assert(VD->getType().isConstant(CGM.getContext()) &&
10576         "Expected constant variable.");
10577  StringRef VarName;
10578  llvm::Constant *Addr;
10579  llvm::GlobalValue::LinkageTypes Linkage;
10580  QualType Ty = VD->getType();
10581  SmallString<128> Buffer;
10582  {
10583    unsigned DeviceID;
10584    unsigned FileID;
10585    unsigned Line;
10586    getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10587                             FileID, Line);
10588    llvm::raw_svector_ostream OS(Buffer);
10589    OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10590       << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10591    VarName = OS.str();
10592  }
10593  Linkage = llvm::GlobalValue::InternalLinkage;
10594  Addr =
10595      getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10596                                  getDefaultFirstprivateAddressSpace());
10597  cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10598  CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10599  CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10600  OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10601      VarName, Addr, VarSize,
10602      OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10603  return Addr;
10604}
10605
10606void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10607                                                   llvm::Constant *Addr) {
10608  if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10609      !CGM.getLangOpts().OpenMPIsDevice)
10610    return;
10611
10612  // If we have host/nohost variables, they do not need to be registered.
10613  Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10614      OMPDeclareTargetDeclAttr::getDeviceType(VD);
10615  if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10616    return;
10617
10618  llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10619      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10620  if (!Res) {
10621    if (CGM.getLangOpts().OpenMPIsDevice) {
10622      // Register non-target variables being emitted in device code (debug info
10623      // may cause this).
10624      StringRef VarName = CGM.getMangledName(VD);
10625      EmittedNonTargetVariables.try_emplace(VarName, Addr);
10626    }
10627    return;
10628  }
10629  // Register declare target variables.
10630  OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10631  StringRef VarName;
10632  CharUnits VarSize;
10633  llvm::GlobalValue::LinkageTypes Linkage;
10634
10635  if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10636      !HasRequiresUnifiedSharedMemory) {
10637    Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10638    VarName = CGM.getMangledName(VD);
10639    if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10640      VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10641      assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10642    } else {
10643      VarSize = CharUnits::Zero();
10644    }
10645    Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10646    // Temp solution to prevent optimizations of the internal variables.
10647    if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10648      // Do not create a "ref-variable" if the original is not also available
10649      // on the host.
10650      if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10651        return;
10652      std::string RefName = getName({VarName, "ref"});
10653      if (!CGM.GetGlobalValue(RefName)) {
10654        llvm::Constant *AddrRef =
10655            getOrCreateInternalVariable(Addr->getType(), RefName);
10656        auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10657        GVAddrRef->setConstant(/*Val=*/true);
10658        GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10659        GVAddrRef->setInitializer(Addr);
10660        CGM.addCompilerUsedGlobal(GVAddrRef);
10661      }
10662    }
10663  } else {
10664    assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10665            (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10666             HasRequiresUnifiedSharedMemory)) &&
10667           "Declare target attribute must link or to with unified memory.");
10668    if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10669      Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10670    else
10671      Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10672
10673    if (CGM.getLangOpts().OpenMPIsDevice) {
10674      VarName = Addr->getName();
10675      Addr = nullptr;
10676    } else {
10677      VarName = getAddrOfDeclareTargetVar(VD).getName();
10678      Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10679    }
10680    VarSize = CGM.getPointerSize();
10681    Linkage = llvm::GlobalValue::WeakAnyLinkage;
10682  }
10683
10684  OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10685      VarName, Addr, VarSize, Flags, Linkage);
10686}
10687
10688bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10689  if (isa<FunctionDecl>(GD.getDecl()) ||
10690      isa<OMPDeclareReductionDecl>(GD.getDecl()))
10691    return emitTargetFunctions(GD);
10692
10693  return emitTargetGlobalVariable(GD);
10694}
10695
10696void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10697  for (const VarDecl *VD : DeferredGlobalVariables) {
10698    llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10699        OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10700    if (!Res)
10701      continue;
10702    if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10703        !HasRequiresUnifiedSharedMemory) {
10704      CGM.EmitGlobal(VD);
10705    } else {
10706      assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10707              (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10708               HasRequiresUnifiedSharedMemory)) &&
10709             "Expected link clause or to clause with unified memory.");
10710      (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10711    }
10712  }
10713}
10714
10715void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10716    CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10717  assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10718         " Expected target-based directive.");
10719}
10720
10721void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10722  for (const OMPClause *Clause : D->clauselists()) {
10723    if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10724      HasRequiresUnifiedSharedMemory = true;
10725    } else if (const auto *AC =
10726                   dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10727      switch (AC->getAtomicDefaultMemOrderKind()) {
10728      case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10729        RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10730        break;
10731      case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10732        RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10733        break;
10734      case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10735        RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10736        break;
10737      case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10738        break;
10739      }
10740    }
10741  }
10742}
10743
10744llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10745  return RequiresAtomicOrdering;
10746}
10747
10748bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10749                                                       LangAS &AS) {
10750  if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10751    return false;
10752  const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10753  switch(A->getAllocatorType()) {
10754  case OMPAllocateDeclAttr::OMPNullMemAlloc:
10755  case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10756  // Not supported, fallback to the default mem space.
10757  case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10758  case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10759  case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10760  case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10761  case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10762  case OMPAllocateDeclAttr::OMPConstMemAlloc:
10763  case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10764    AS = LangAS::Default;
10765    return true;
10766  case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10767    llvm_unreachable("Expected predefined allocator for the variables with the "
10768                     "static storage.");
10769  }
10770  return false;
10771}
10772
10773bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10774  return HasRequiresUnifiedSharedMemory;
10775}
10776
10777CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10778    CodeGenModule &CGM)
10779    : CGM(CGM) {
10780  if (CGM.getLangOpts().OpenMPIsDevice) {
10781    SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10782    CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10783  }
10784}
10785
10786CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10787  if (CGM.getLangOpts().OpenMPIsDevice)
10788    CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10789}
10790
10791bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10792  if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10793    return true;
10794
10795  const auto *D = cast<FunctionDecl>(GD.getDecl());
10796  // Do not to emit function if it is marked as declare target as it was already
10797  // emitted.
10798  if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10799    if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10800      if (auto *F = dyn_cast_or_null<llvm::Function>(
10801              CGM.GetGlobalValue(CGM.getMangledName(GD))))
10802        return !F->isDeclaration();
10803      return false;
10804    }
10805    return true;
10806  }
10807
10808  return !AlreadyEmittedTargetDecls.insert(D).second;
10809}
10810
10811llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10812  // If we don't have entries or if we are emitting code for the device, we
10813  // don't need to do anything.
10814  if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10815      CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10816      (OffloadEntriesInfoManager.empty() &&
10817       !HasEmittedDeclareTargetRegion &&
10818       !HasEmittedTargetRegion))
10819    return nullptr;
10820
10821  // Create and register the function that handles the requires directives.
10822  ASTContext &C = CGM.getContext();
10823
10824  llvm::Function *RequiresRegFn;
10825  {
10826    CodeGenFunction CGF(CGM);
10827    const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10828    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10829    std::string ReqName = getName({"omp_offloading", "requires_reg"});
10830    RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10831    CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10832    OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10833    // TODO: check for other requires clauses.
10834    // The requires directive takes effect only when a target region is
10835    // present in the compilation unit. Otherwise it is ignored and not
10836    // passed to the runtime. This avoids the runtime from throwing an error
10837    // for mismatching requires clauses across compilation units that don't
10838    // contain at least 1 target region.
10839    assert((HasEmittedTargetRegion ||
10840            HasEmittedDeclareTargetRegion ||
10841            !OffloadEntriesInfoManager.empty()) &&
10842           "Target or declare target region expected.");
10843    if (HasRequiresUnifiedSharedMemory)
10844      Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10845    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10846                            CGM.getModule(), OMPRTL___tgt_register_requires),
10847                        llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10848    CGF.FinishFunction();
10849  }
10850  return RequiresRegFn;
10851}
10852
10853void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10854                                    const OMPExecutableDirective &D,
10855                                    SourceLocation Loc,
10856                                    llvm::Function *OutlinedFn,
10857                                    ArrayRef<llvm::Value *> CapturedVars) {
10858  if (!CGF.HaveInsertPoint())
10859    return;
10860
10861  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10862  CodeGenFunction::RunCleanupsScope Scope(CGF);
10863
10864  // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10865  llvm::Value *Args[] = {
10866      RTLoc,
10867      CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10868      CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10869  llvm::SmallVector<llvm::Value *, 16> RealArgs;
10870  RealArgs.append(std::begin(Args), std::end(Args));
10871  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10872
10873  llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10874      CGM.getModule(), OMPRTL___kmpc_fork_teams);
10875  CGF.EmitRuntimeCall(RTLFn, RealArgs);
10876}
10877
10878void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10879                                         const Expr *NumTeams,
10880                                         const Expr *ThreadLimit,
10881                                         SourceLocation Loc) {
10882  if (!CGF.HaveInsertPoint())
10883    return;
10884
10885  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10886
10887  llvm::Value *NumTeamsVal =
10888      NumTeams
10889          ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10890                                      CGF.CGM.Int32Ty, /* isSigned = */ true)
10891          : CGF.Builder.getInt32(0);
10892
10893  llvm::Value *ThreadLimitVal =
10894      ThreadLimit
10895          ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10896                                      CGF.CGM.Int32Ty, /* isSigned = */ true)
10897          : CGF.Builder.getInt32(0);
10898
10899  // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10900  llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10901                                     ThreadLimitVal};
10902  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10903                          CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10904                      PushNumTeamsArgs);
10905}
10906
10907void CGOpenMPRuntime::emitTargetDataCalls(
10908    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10909    const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10910  if (!CGF.HaveInsertPoint())
10911    return;
10912
10913  // Action used to replace the default codegen action and turn privatization
10914  // off.
10915  PrePostActionTy NoPrivAction;
10916
10917  // Generate the code for the opening of the data environment. Capture all the
10918  // arguments of the runtime call by reference because they are used in the
10919  // closing of the region.
10920  auto &&BeginThenGen = [this, &D, Device, &Info,
10921                         &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10922    // Fill up the arrays with all the mapped variables.
10923    MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10924
10925    // Get map clause information.
10926    MappableExprsHandler MEHandler(D, CGF);
10927    MEHandler.generateAllInfo(CombinedInfo);
10928
10929    // Fill up the arrays and create the arguments.
10930    emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10931                         /*IsNonContiguous=*/true);
10932
10933    llvm::Value *BasePointersArrayArg = nullptr;
10934    llvm::Value *PointersArrayArg = nullptr;
10935    llvm::Value *SizesArrayArg = nullptr;
10936    llvm::Value *MapTypesArrayArg = nullptr;
10937    llvm::Value *MapNamesArrayArg = nullptr;
10938    llvm::Value *MappersArrayArg = nullptr;
10939    emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10940                                 SizesArrayArg, MapTypesArrayArg,
10941                                 MapNamesArrayArg, MappersArrayArg, Info);
10942
10943    // Emit device ID if any.
10944    llvm::Value *DeviceID = nullptr;
10945    if (Device) {
10946      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10947                                           CGF.Int64Ty, /*isSigned=*/true);
10948    } else {
10949      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10950    }
10951
10952    // Emit the number of elements in the offloading arrays.
10953    llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10954    //
10955    // Source location for the ident struct
10956    llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10957
10958    llvm::Value *OffloadingArgs[] = {RTLoc,
10959                                     DeviceID,
10960                                     PointerNum,
10961                                     BasePointersArrayArg,
10962                                     PointersArrayArg,
10963                                     SizesArrayArg,
10964                                     MapTypesArrayArg,
10965                                     MapNamesArrayArg,
10966                                     MappersArrayArg};
10967    CGF.EmitRuntimeCall(
10968        OMPBuilder.getOrCreateRuntimeFunction(
10969            CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10970        OffloadingArgs);
10971
10972    // If device pointer privatization is required, emit the body of the region
10973    // here. It will have to be duplicated: with and without privatization.
10974    if (!Info.CaptureDeviceAddrMap.empty())
10975      CodeGen(CGF);
10976  };
10977
10978  // Generate code for the closing of the data region.
10979  auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10980                                                PrePostActionTy &) {
10981    assert(Info.isValid() && "Invalid data environment closing arguments.");
10982
10983    llvm::Value *BasePointersArrayArg = nullptr;
10984    llvm::Value *PointersArrayArg = nullptr;
10985    llvm::Value *SizesArrayArg = nullptr;
10986    llvm::Value *MapTypesArrayArg = nullptr;
10987    llvm::Value *MapNamesArrayArg = nullptr;
10988    llvm::Value *MappersArrayArg = nullptr;
10989    emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10990                                 SizesArrayArg, MapTypesArrayArg,
10991                                 MapNamesArrayArg, MappersArrayArg, Info,
10992                                 {/*ForEndCall=*/true});
10993
10994    // Emit device ID if any.
10995    llvm::Value *DeviceID = nullptr;
10996    if (Device) {
10997      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10998                                           CGF.Int64Ty, /*isSigned=*/true);
10999    } else {
11000      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11001    }
11002
11003    // Emit the number of elements in the offloading arrays.
11004    llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11005
11006    // Source location for the ident struct
11007    llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11008
11009    llvm::Value *OffloadingArgs[] = {RTLoc,
11010                                     DeviceID,
11011                                     PointerNum,
11012                                     BasePointersArrayArg,
11013                                     PointersArrayArg,
11014                                     SizesArrayArg,
11015                                     MapTypesArrayArg,
11016                                     MapNamesArrayArg,
11017                                     MappersArrayArg};
11018    CGF.EmitRuntimeCall(
11019        OMPBuilder.getOrCreateRuntimeFunction(
11020            CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11021        OffloadingArgs);
11022  };
11023
11024  // If we need device pointer privatization, we need to emit the body of the
11025  // region with no privatization in the 'else' branch of the conditional.
11026  // Otherwise, we don't have to do anything.
11027  auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11028                                                         PrePostActionTy &) {
11029    if (!Info.CaptureDeviceAddrMap.empty()) {
11030      CodeGen.setAction(NoPrivAction);
11031      CodeGen(CGF);
11032    }
11033  };
11034
11035  // We don't have to do anything to close the region if the if clause evaluates
11036  // to false.
11037  auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11038
11039  if (IfCond) {
11040    emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11041  } else {
11042    RegionCodeGenTy RCG(BeginThenGen);
11043    RCG(CGF);
11044  }
11045
11046  // If we don't require privatization of device pointers, we emit the body in
11047  // between the runtime calls. This avoids duplicating the body code.
11048  if (Info.CaptureDeviceAddrMap.empty()) {
11049    CodeGen.setAction(NoPrivAction);
11050    CodeGen(CGF);
11051  }
11052
11053  if (IfCond) {
11054    emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11055  } else {
11056    RegionCodeGenTy RCG(EndThenGen);
11057    RCG(CGF);
11058  }
11059}
11060
11061void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11062    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11063    const Expr *Device) {
11064  if (!CGF.HaveInsertPoint())
11065    return;
11066
11067  assert((isa<OMPTargetEnterDataDirective>(D) ||
11068          isa<OMPTargetExitDataDirective>(D) ||
11069          isa<OMPTargetUpdateDirective>(D)) &&
11070         "Expecting either target enter, exit data, or update directives.");
11071
11072  CodeGenFunction::OMPTargetDataInfo InputInfo;
11073  llvm::Value *MapTypesArray = nullptr;
11074  llvm::Value *MapNamesArray = nullptr;
11075  // Generate the code for the opening of the data environment.
11076  auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11077                    &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11078    // Emit device ID if any.
11079    llvm::Value *DeviceID = nullptr;
11080    if (Device) {
11081      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11082                                           CGF.Int64Ty, /*isSigned=*/true);
11083    } else {
11084      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11085    }
11086
11087    // Emit the number of elements in the offloading arrays.
11088    llvm::Constant *PointerNum =
11089        CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11090
11091    // Source location for the ident struct
11092    llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11093
11094    llvm::Value *OffloadingArgs[] = {RTLoc,
11095                                     DeviceID,
11096                                     PointerNum,
11097                                     InputInfo.BasePointersArray.getPointer(),
11098                                     InputInfo.PointersArray.getPointer(),
11099                                     InputInfo.SizesArray.getPointer(),
11100                                     MapTypesArray,
11101                                     MapNamesArray,
11102                                     InputInfo.MappersArray.getPointer()};
11103
11104    // Select the right runtime function call for each standalone
11105    // directive.
11106    const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11107    RuntimeFunction RTLFn;
11108    switch (D.getDirectiveKind()) {
11109    case OMPD_target_enter_data:
11110      RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11111                        : OMPRTL___tgt_target_data_begin_mapper;
11112      break;
11113    case OMPD_target_exit_data:
11114      RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11115                        : OMPRTL___tgt_target_data_end_mapper;
11116      break;
11117    case OMPD_target_update:
11118      RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11119                        : OMPRTL___tgt_target_data_update_mapper;
11120      break;
11121    case OMPD_parallel:
11122    case OMPD_for:
11123    case OMPD_parallel_for:
11124    case OMPD_parallel_master:
11125    case OMPD_parallel_sections:
11126    case OMPD_for_simd:
11127    case OMPD_parallel_for_simd:
11128    case OMPD_cancel:
11129    case OMPD_cancellation_point:
11130    case OMPD_ordered:
11131    case OMPD_threadprivate:
11132    case OMPD_allocate:
11133    case OMPD_task:
11134    case OMPD_simd:
11135    case OMPD_tile:
11136    case OMPD_sections:
11137    case OMPD_section:
11138    case OMPD_single:
11139    case OMPD_master:
11140    case OMPD_critical:
11141    case OMPD_taskyield:
11142    case OMPD_barrier:
11143    case OMPD_taskwait:
11144    case OMPD_taskgroup:
11145    case OMPD_atomic:
11146    case OMPD_flush:
11147    case OMPD_depobj:
11148    case OMPD_scan:
11149    case OMPD_teams:
11150    case OMPD_target_data:
11151    case OMPD_distribute:
11152    case OMPD_distribute_simd:
11153    case OMPD_distribute_parallel_for:
11154    case OMPD_distribute_parallel_for_simd:
11155    case OMPD_teams_distribute:
11156    case OMPD_teams_distribute_simd:
11157    case OMPD_teams_distribute_parallel_for:
11158    case OMPD_teams_distribute_parallel_for_simd:
11159    case OMPD_declare_simd:
11160    case OMPD_declare_variant:
11161    case OMPD_begin_declare_variant:
11162    case OMPD_end_declare_variant:
11163    case OMPD_declare_target:
11164    case OMPD_end_declare_target:
11165    case OMPD_declare_reduction:
11166    case OMPD_declare_mapper:
11167    case OMPD_taskloop:
11168    case OMPD_taskloop_simd:
11169    case OMPD_master_taskloop:
11170    case OMPD_master_taskloop_simd:
11171    case OMPD_parallel_master_taskloop:
11172    case OMPD_parallel_master_taskloop_simd:
11173    case OMPD_target:
11174    case OMPD_target_simd:
11175    case OMPD_target_teams_distribute:
11176    case OMPD_target_teams_distribute_simd:
11177    case OMPD_target_teams_distribute_parallel_for:
11178    case OMPD_target_teams_distribute_parallel_for_simd:
11179    case OMPD_target_teams:
11180    case OMPD_target_parallel:
11181    case OMPD_target_parallel_for:
11182    case OMPD_target_parallel_for_simd:
11183    case OMPD_requires:
11184    case OMPD_unknown:
11185    default:
11186      llvm_unreachable("Unexpected standalone target data directive.");
11187      break;
11188    }
11189    CGF.EmitRuntimeCall(
11190        OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11191        OffloadingArgs);
11192  };
11193
11194  auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11195                          &MapNamesArray](CodeGenFunction &CGF,
11196                                          PrePostActionTy &) {
11197    // Fill up the arrays with all the mapped variables.
11198    MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11199
11200    // Get map clause information.
11201    MappableExprsHandler MEHandler(D, CGF);
11202    MEHandler.generateAllInfo(CombinedInfo);
11203
11204    TargetDataInfo Info;
11205    // Fill up the arrays and create the arguments.
11206    emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11207                         /*IsNonContiguous=*/true);
11208    bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11209                             D.hasClausesOfKind<OMPNowaitClause>();
11210    emitOffloadingArraysArgument(
11211        CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11212        Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11213        {/*ForEndTask=*/false});
11214    InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11215    InputInfo.BasePointersArray =
11216        Address(Info.BasePointersArray, CGM.getPointerAlign());
11217    InputInfo.PointersArray =
11218        Address(Info.PointersArray, CGM.getPointerAlign());
11219    InputInfo.SizesArray =
11220        Address(Info.SizesArray, CGM.getPointerAlign());
11221    InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11222    MapTypesArray = Info.MapTypesArray;
11223    MapNamesArray = Info.MapNamesArray;
11224    if (RequiresOuterTask)
11225      CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11226    else
11227      emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11228  };
11229
11230  if (IfCond) {
11231    emitIfClause(CGF, IfCond, TargetThenGen,
11232                 [](CodeGenFunction &CGF, PrePostActionTy &) {});
11233  } else {
11234    RegionCodeGenTy ThenRCG(TargetThenGen);
11235    ThenRCG(CGF);
11236  }
11237}
11238
11239namespace {
11240  /// Kind of parameter in a function with 'declare simd' directive.
11241  enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11242  /// Attribute set of the parameter.
11243  struct ParamAttrTy {
11244    ParamKindTy Kind = Vector;
11245    llvm::APSInt StrideOrArg;
11246    llvm::APSInt Alignment;
11247  };
11248} // namespace
11249
11250static unsigned evaluateCDTSize(const FunctionDecl *FD,
11251                                ArrayRef<ParamAttrTy> ParamAttrs) {
11252  // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11253  // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11254  // of that clause. The VLEN value must be power of 2.
11255  // In other case the notion of the function`s "characteristic data type" (CDT)
11256  // is used to compute the vector length.
11257  // CDT is defined in the following order:
11258  //   a) For non-void function, the CDT is the return type.
11259  //   b) If the function has any non-uniform, non-linear parameters, then the
11260  //   CDT is the type of the first such parameter.
11261  //   c) If the CDT determined by a) or b) above is struct, union, or class
11262  //   type which is pass-by-value (except for the type that maps to the
11263  //   built-in complex data type), the characteristic data type is int.
11264  //   d) If none of the above three cases is applicable, the CDT is int.
11265  // The VLEN is then determined based on the CDT and the size of vector
11266  // register of that ISA for which current vector version is generated. The
11267  // VLEN is computed using the formula below:
11268  //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11269  // where vector register size specified in section 3.2.1 Registers and the
11270  // Stack Frame of original AMD64 ABI document.
11271  QualType RetType = FD->getReturnType();
11272  if (RetType.isNull())
11273    return 0;
11274  ASTContext &C = FD->getASTContext();
11275  QualType CDT;
11276  if (!RetType.isNull() && !RetType->isVoidType()) {
11277    CDT = RetType;
11278  } else {
11279    unsigned Offset = 0;
11280    if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11281      if (ParamAttrs[Offset].Kind == Vector)
11282        CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11283      ++Offset;
11284    }
11285    if (CDT.isNull()) {
11286      for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11287        if (ParamAttrs[I + Offset].Kind == Vector) {
11288          CDT = FD->getParamDecl(I)->getType();
11289          break;
11290        }
11291      }
11292    }
11293  }
11294  if (CDT.isNull())
11295    CDT = C.IntTy;
11296  CDT = CDT->getCanonicalTypeUnqualified();
11297  if (CDT->isRecordType() || CDT->isUnionType())
11298    CDT = C.IntTy;
11299  return C.getTypeSize(CDT);
11300}
11301
11302static void
11303emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11304                           const llvm::APSInt &VLENVal,
11305                           ArrayRef<ParamAttrTy> ParamAttrs,
11306                           OMPDeclareSimdDeclAttr::BranchStateTy State) {
11307  struct ISADataTy {
11308    char ISA;
11309    unsigned VecRegSize;
11310  };
11311  ISADataTy ISAData[] = {
11312      {
11313          'b', 128
11314      }, // SSE
11315      {
11316          'c', 256
11317      }, // AVX
11318      {
11319          'd', 256
11320      }, // AVX2
11321      {
11322          'e', 512
11323      }, // AVX512
11324  };
11325  llvm::SmallVector<char, 2> Masked;
11326  switch (State) {
11327  case OMPDeclareSimdDeclAttr::BS_Undefined:
11328    Masked.push_back('N');
11329    Masked.push_back('M');
11330    break;
11331  case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11332    Masked.push_back('N');
11333    break;
11334  case OMPDeclareSimdDeclAttr::BS_Inbranch:
11335    Masked.push_back('M');
11336    break;
11337  }
11338  for (char Mask : Masked) {
11339    for (const ISADataTy &Data : ISAData) {
11340      SmallString<256> Buffer;
11341      llvm::raw_svector_ostream Out(Buffer);
11342      Out << "_ZGV" << Data.ISA << Mask;
11343      if (!VLENVal) {
11344        unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11345        assert(NumElts && "Non-zero simdlen/cdtsize expected");
11346        Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11347      } else {
11348        Out << VLENVal;
11349      }
11350      for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11351        switch (ParamAttr.Kind){
11352        case LinearWithVarStride:
11353          Out << 's' << ParamAttr.StrideOrArg;
11354          break;
11355        case Linear:
11356          Out << 'l';
11357          if (ParamAttr.StrideOrArg != 1)
11358            Out << ParamAttr.StrideOrArg;
11359          break;
11360        case Uniform:
11361          Out << 'u';
11362          break;
11363        case Vector:
11364          Out << 'v';
11365          break;
11366        }
11367        if (!!ParamAttr.Alignment)
11368          Out << 'a' << ParamAttr.Alignment;
11369      }
11370      Out << '_' << Fn->getName();
11371      Fn->addFnAttr(Out.str());
11372    }
11373  }
11374}
11375
11376// This are the Functions that are needed to mangle the name of the
11377// vector functions generated by the compiler, according to the rules
11378// defined in the "Vector Function ABI specifications for AArch64",
11379// available at
11380// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11381
11382/// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11383///
11384/// TODO: Need to implement the behavior for reference marked with a
11385/// var or no linear modifiers (1.b in the section). For this, we
11386/// need to extend ParamKindTy to support the linear modifiers.
11387static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11388  QT = QT.getCanonicalType();
11389
11390  if (QT->isVoidType())
11391    return false;
11392
11393  if (Kind == ParamKindTy::Uniform)
11394    return false;
11395
11396  if (Kind == ParamKindTy::Linear)
11397    return false;
11398
11399  // TODO: Handle linear references with modifiers
11400
11401  if (Kind == ParamKindTy::LinearWithVarStride)
11402    return false;
11403
11404  return true;
11405}
11406
11407/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11408static bool getAArch64PBV(QualType QT, ASTContext &C) {
11409  QT = QT.getCanonicalType();
11410  unsigned Size = C.getTypeSize(QT);
11411
11412  // Only scalars and complex within 16 bytes wide set PVB to true.
11413  if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11414    return false;
11415
11416  if (QT->isFloatingType())
11417    return true;
11418
11419  if (QT->isIntegerType())
11420    return true;
11421
11422  if (QT->isPointerType())
11423    return true;
11424
11425  // TODO: Add support for complex types (section 3.1.2, item 2).
11426
11427  return false;
11428}
11429
11430/// Computes the lane size (LS) of a return type or of an input parameter,
11431/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11432/// TODO: Add support for references, section 3.2.1, item 1.
11433static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11434  if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11435    QualType PTy = QT.getCanonicalType()->getPointeeType();
11436    if (getAArch64PBV(PTy, C))
11437      return C.getTypeSize(PTy);
11438  }
11439  if (getAArch64PBV(QT, C))
11440    return C.getTypeSize(QT);
11441
11442  return C.getTypeSize(C.getUIntPtrType());
11443}
11444
11445// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11446// signature of the scalar function, as defined in 3.2.2 of the
11447// AAVFABI.
11448static std::tuple<unsigned, unsigned, bool>
11449getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11450  QualType RetType = FD->getReturnType().getCanonicalType();
11451
11452  ASTContext &C = FD->getASTContext();
11453
11454  bool OutputBecomesInput = false;
11455
11456  llvm::SmallVector<unsigned, 8> Sizes;
11457  if (!RetType->isVoidType()) {
11458    Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11459    if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11460      OutputBecomesInput = true;
11461  }
11462  for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11463    QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11464    Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11465  }
11466
11467  assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11468  // The LS of a function parameter / return value can only be a power
11469  // of 2, starting from 8 bits, up to 128.
11470  assert(std::all_of(Sizes.begin(), Sizes.end(),
11471                     [](unsigned Size) {
11472                       return Size == 8 || Size == 16 || Size == 32 ||
11473                              Size == 64 || Size == 128;
11474                     }) &&
11475         "Invalid size");
11476
11477  return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11478                         *std::max_element(std::begin(Sizes), std::end(Sizes)),
11479                         OutputBecomesInput);
11480}
11481
11482/// Mangle the parameter part of the vector function name according to
11483/// their OpenMP classification. The mangling function is defined in
11484/// section 3.5 of the AAVFABI.
11485static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11486  SmallString<256> Buffer;
11487  llvm::raw_svector_ostream Out(Buffer);
11488  for (const auto &ParamAttr : ParamAttrs) {
11489    switch (ParamAttr.Kind) {
11490    case LinearWithVarStride:
11491      Out << "ls" << ParamAttr.StrideOrArg;
11492      break;
11493    case Linear:
11494      Out << 'l';
11495      // Don't print the step value if it is not present or if it is
11496      // equal to 1.
11497      if (ParamAttr.StrideOrArg != 1)
11498        Out << ParamAttr.StrideOrArg;
11499      break;
11500    case Uniform:
11501      Out << 'u';
11502      break;
11503    case Vector:
11504      Out << 'v';
11505      break;
11506    }
11507
11508    if (!!ParamAttr.Alignment)
11509      Out << 'a' << ParamAttr.Alignment;
11510  }
11511
11512  return std::string(Out.str());
11513}
11514
11515// Function used to add the attribute. The parameter `VLEN` is
11516// templated to allow the use of "x" when targeting scalable functions
11517// for SVE.
11518template <typename T>
11519static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11520                                 char ISA, StringRef ParSeq,
11521                                 StringRef MangledName, bool OutputBecomesInput,
11522                                 llvm::Function *Fn) {
11523  SmallString<256> Buffer;
11524  llvm::raw_svector_ostream Out(Buffer);
11525  Out << Prefix << ISA << LMask << VLEN;
11526  if (OutputBecomesInput)
11527    Out << "v";
11528  Out << ParSeq << "_" << MangledName;
11529  Fn->addFnAttr(Out.str());
11530}
11531
11532// Helper function to generate the Advanced SIMD names depending on
11533// the value of the NDS when simdlen is not present.
11534static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11535                                      StringRef Prefix, char ISA,
11536                                      StringRef ParSeq, StringRef MangledName,
11537                                      bool OutputBecomesInput,
11538                                      llvm::Function *Fn) {
11539  switch (NDS) {
11540  case 8:
11541    addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11542                         OutputBecomesInput, Fn);
11543    addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11544                         OutputBecomesInput, Fn);
11545    break;
11546  case 16:
11547    addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11548                         OutputBecomesInput, Fn);
11549    addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11550                         OutputBecomesInput, Fn);
11551    break;
11552  case 32:
11553    addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11554                         OutputBecomesInput, Fn);
11555    addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11556                         OutputBecomesInput, Fn);
11557    break;
11558  case 64:
11559  case 128:
11560    addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11561                         OutputBecomesInput, Fn);
11562    break;
11563  default:
11564    llvm_unreachable("Scalar type is too wide.");
11565  }
11566}
11567
11568/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11569static void emitAArch64DeclareSimdFunction(
11570    CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11571    ArrayRef<ParamAttrTy> ParamAttrs,
11572    OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11573    char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11574
11575  // Get basic data for building the vector signature.
11576  const auto Data = getNDSWDS(FD, ParamAttrs);
11577  const unsigned NDS = std::get<0>(Data);
11578  const unsigned WDS = std::get<1>(Data);
11579  const bool OutputBecomesInput = std::get<2>(Data);
11580
11581  // Check the values provided via `simdlen` by the user.
11582  // 1. A `simdlen(1)` doesn't produce vector signatures,
11583  if (UserVLEN == 1) {
11584    unsigned DiagID = CGM.getDiags().getCustomDiagID(
11585        DiagnosticsEngine::Warning,
11586        "The clause simdlen(1) has no effect when targeting aarch64.");
11587    CGM.getDiags().Report(SLoc, DiagID);
11588    return;
11589  }
11590
11591  // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11592  // Advanced SIMD output.
11593  if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11594    unsigned DiagID = CGM.getDiags().getCustomDiagID(
11595        DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11596                                    "power of 2 when targeting Advanced SIMD.");
11597    CGM.getDiags().Report(SLoc, DiagID);
11598    return;
11599  }
11600
11601  // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11602  // limits.
11603  if (ISA == 's' && UserVLEN != 0) {
11604    if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11605      unsigned DiagID = CGM.getDiags().getCustomDiagID(
11606          DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11607                                      "lanes in the architectural constraints "
11608                                      "for SVE (min is 128-bit, max is "
11609                                      "2048-bit, by steps of 128-bit)");
11610      CGM.getDiags().Report(SLoc, DiagID) << WDS;
11611      return;
11612    }
11613  }
11614
11615  // Sort out parameter sequence.
11616  const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11617  StringRef Prefix = "_ZGV";
11618  // Generate simdlen from user input (if any).
11619  if (UserVLEN) {
11620    if (ISA == 's') {
11621      // SVE generates only a masked function.
11622      addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11623                           OutputBecomesInput, Fn);
11624    } else {
11625      assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11626      // Advanced SIMD generates one or two functions, depending on
11627      // the `[not]inbranch` clause.
11628      switch (State) {
11629      case OMPDeclareSimdDeclAttr::BS_Undefined:
11630        addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11631                             OutputBecomesInput, Fn);
11632        addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11633                             OutputBecomesInput, Fn);
11634        break;
11635      case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11636        addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11637                             OutputBecomesInput, Fn);
11638        break;
11639      case OMPDeclareSimdDeclAttr::BS_Inbranch:
11640        addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11641                             OutputBecomesInput, Fn);
11642        break;
11643      }
11644    }
11645  } else {
11646    // If no user simdlen is provided, follow the AAVFABI rules for
11647    // generating the vector length.
11648    if (ISA == 's') {
11649      // SVE, section 3.4.1, item 1.
11650      addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11651                           OutputBecomesInput, Fn);
11652    } else {
11653      assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11654      // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11655      // two vector names depending on the use of the clause
11656      // `[not]inbranch`.
11657      switch (State) {
11658      case OMPDeclareSimdDeclAttr::BS_Undefined:
11659        addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11660                                  OutputBecomesInput, Fn);
11661        addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11662                                  OutputBecomesInput, Fn);
11663        break;
11664      case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11665        addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11666                                  OutputBecomesInput, Fn);
11667        break;
11668      case OMPDeclareSimdDeclAttr::BS_Inbranch:
11669        addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11670                                  OutputBecomesInput, Fn);
11671        break;
11672      }
11673    }
11674  }
11675}
11676
11677void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11678                                              llvm::Function *Fn) {
11679  ASTContext &C = CGM.getContext();
11680  FD = FD->getMostRecentDecl();
11681  // Map params to their positions in function decl.
11682  llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11683  if (isa<CXXMethodDecl>(FD))
11684    ParamPositions.try_emplace(FD, 0);
11685  unsigned ParamPos = ParamPositions.size();
11686  for (const ParmVarDecl *P : FD->parameters()) {
11687    ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11688    ++ParamPos;
11689  }
11690  while (FD) {
11691    for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11692      llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11693      // Mark uniform parameters.
11694      for (const Expr *E : Attr->uniforms()) {
11695        E = E->IgnoreParenImpCasts();
11696        unsigned Pos;
11697        if (isa<CXXThisExpr>(E)) {
11698          Pos = ParamPositions[FD];
11699        } else {
11700          const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11701                                ->getCanonicalDecl();
11702          Pos = ParamPositions[PVD];
11703        }
11704        ParamAttrs[Pos].Kind = Uniform;
11705      }
11706      // Get alignment info.
11707      auto NI = Attr->alignments_begin();
11708      for (const Expr *E : Attr->aligneds()) {
11709        E = E->IgnoreParenImpCasts();
11710        unsigned Pos;
11711        QualType ParmTy;
11712        if (isa<CXXThisExpr>(E)) {
11713          Pos = ParamPositions[FD];
11714          ParmTy = E->getType();
11715        } else {
11716          const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11717                                ->getCanonicalDecl();
11718          Pos = ParamPositions[PVD];
11719          ParmTy = PVD->getType();
11720        }
11721        ParamAttrs[Pos].Alignment =
11722            (*NI)
11723                ? (*NI)->EvaluateKnownConstInt(C)
11724                : llvm::APSInt::getUnsigned(
11725                      C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11726                          .getQuantity());
11727        ++NI;
11728      }
11729      // Mark linear parameters.
11730      auto SI = Attr->steps_begin();
11731      auto MI = Attr->modifiers_begin();
11732      for (const Expr *E : Attr->linears()) {
11733        E = E->IgnoreParenImpCasts();
11734        unsigned Pos;
11735        // Rescaling factor needed to compute the linear parameter
11736        // value in the mangled name.
11737        unsigned PtrRescalingFactor = 1;
11738        if (isa<CXXThisExpr>(E)) {
11739          Pos = ParamPositions[FD];
11740        } else {
11741          const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11742                                ->getCanonicalDecl();
11743          Pos = ParamPositions[PVD];
11744          if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11745            PtrRescalingFactor = CGM.getContext()
11746                                     .getTypeSizeInChars(P->getPointeeType())
11747                                     .getQuantity();
11748        }
11749        ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11750        ParamAttr.Kind = Linear;
11751        // Assuming a stride of 1, for `linear` without modifiers.
11752        ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11753        if (*SI) {
11754          Expr::EvalResult Result;
11755          if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11756            if (const auto *DRE =
11757                    cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11758              if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11759                ParamAttr.Kind = LinearWithVarStride;
11760                ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11761                    ParamPositions[StridePVD->getCanonicalDecl()]);
11762              }
11763            }
11764          } else {
11765            ParamAttr.StrideOrArg = Result.Val.getInt();
11766          }
11767        }
11768        // If we are using a linear clause on a pointer, we need to
11769        // rescale the value of linear_step with the byte size of the
11770        // pointee type.
11771        if (Linear == ParamAttr.Kind)
11772          ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11773        ++SI;
11774        ++MI;
11775      }
11776      llvm::APSInt VLENVal;
11777      SourceLocation ExprLoc;
11778      const Expr *VLENExpr = Attr->getSimdlen();
11779      if (VLENExpr) {
11780        VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11781        ExprLoc = VLENExpr->getExprLoc();
11782      }
11783      OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11784      if (CGM.getTriple().isX86()) {
11785        emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11786      } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11787        unsigned VLEN = VLENVal.getExtValue();
11788        StringRef MangledName = Fn->getName();
11789        if (CGM.getTarget().hasFeature("sve"))
11790          emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11791                                         MangledName, 's', 128, Fn, ExprLoc);
11792        if (CGM.getTarget().hasFeature("neon"))
11793          emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11794                                         MangledName, 'n', 128, Fn, ExprLoc);
11795      }
11796    }
11797    FD = FD->getPreviousDecl();
11798  }
11799}
11800
11801namespace {
11802/// Cleanup action for doacross support.
11803class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11804public:
11805  static const int DoacrossFinArgs = 2;
11806
11807private:
11808  llvm::FunctionCallee RTLFn;
11809  llvm::Value *Args[DoacrossFinArgs];
11810
11811public:
11812  DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11813                    ArrayRef<llvm::Value *> CallArgs)
11814      : RTLFn(RTLFn) {
11815    assert(CallArgs.size() == DoacrossFinArgs);
11816    std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11817  }
11818  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11819    if (!CGF.HaveInsertPoint())
11820      return;
11821    CGF.EmitRuntimeCall(RTLFn, Args);
11822  }
11823};
11824} // namespace
11825
11826void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11827                                       const OMPLoopDirective &D,
11828                                       ArrayRef<Expr *> NumIterations) {
11829  if (!CGF.HaveInsertPoint())
11830    return;
11831
11832  ASTContext &C = CGM.getContext();
11833  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11834  RecordDecl *RD;
11835  if (KmpDimTy.isNull()) {
11836    // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11837    //  kmp_int64 lo; // lower
11838    //  kmp_int64 up; // upper
11839    //  kmp_int64 st; // stride
11840    // };
11841    RD = C.buildImplicitRecord("kmp_dim");
11842    RD->startDefinition();
11843    addFieldToRecordDecl(C, RD, Int64Ty);
11844    addFieldToRecordDecl(C, RD, Int64Ty);
11845    addFieldToRecordDecl(C, RD, Int64Ty);
11846    RD->completeDefinition();
11847    KmpDimTy = C.getRecordType(RD);
11848  } else {
11849    RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11850  }
11851  llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11852  QualType ArrayTy =
11853      C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11854
11855  Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11856  CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11857  enum { LowerFD = 0, UpperFD, StrideFD };
11858  // Fill dims with data.
11859  for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11860    LValue DimsLVal = CGF.MakeAddrLValue(
11861        CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11862    // dims.upper = num_iterations;
11863    LValue UpperLVal = CGF.EmitLValueForField(
11864        DimsLVal, *std::next(RD->field_begin(), UpperFD));
11865    llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11866        CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11867        Int64Ty, NumIterations[I]->getExprLoc());
11868    CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11869    // dims.stride = 1;
11870    LValue StrideLVal = CGF.EmitLValueForField(
11871        DimsLVal, *std::next(RD->field_begin(), StrideFD));
11872    CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11873                          StrideLVal);
11874  }
11875
11876  // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11877  // kmp_int32 num_dims, struct kmp_dim * dims);
11878  llvm::Value *Args[] = {
11879      emitUpdateLocation(CGF, D.getBeginLoc()),
11880      getThreadID(CGF, D.getBeginLoc()),
11881      llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11882      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11883          CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11884          CGM.VoidPtrTy)};
11885
11886  llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11887      CGM.getModule(), OMPRTL___kmpc_doacross_init);
11888  CGF.EmitRuntimeCall(RTLFn, Args);
11889  llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11890      emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11891  llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11892      CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11893  CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11894                                             llvm::makeArrayRef(FiniArgs));
11895}
11896
11897void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11898                                          const OMPDependClause *C) {
11899  QualType Int64Ty =
11900      CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11901  llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11902  QualType ArrayTy = CGM.getContext().getConstantArrayType(
11903      Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11904  Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11905  for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11906    const Expr *CounterVal = C->getLoopData(I);
11907    assert(CounterVal);
11908    llvm::Value *CntVal = CGF.EmitScalarConversion(
11909        CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11910        CounterVal->getExprLoc());
11911    CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11912                          /*Volatile=*/false, Int64Ty);
11913  }
11914  llvm::Value *Args[] = {
11915      emitUpdateLocation(CGF, C->getBeginLoc()),
11916      getThreadID(CGF, C->getBeginLoc()),
11917      CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11918  llvm::FunctionCallee RTLFn;
11919  if (C->getDependencyKind() == OMPC_DEPEND_source) {
11920    RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11921                                                  OMPRTL___kmpc_doacross_post);
11922  } else {
11923    assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11924    RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11925                                                  OMPRTL___kmpc_doacross_wait);
11926  }
11927  CGF.EmitRuntimeCall(RTLFn, Args);
11928}
11929
11930void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11931                               llvm::FunctionCallee Callee,
11932                               ArrayRef<llvm::Value *> Args) const {
11933  assert(Loc.isValid() && "Outlined function call location must be valid.");
11934  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11935
11936  if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11937    if (Fn->doesNotThrow()) {
11938      CGF.EmitNounwindRuntimeCall(Fn, Args);
11939      return;
11940    }
11941  }
11942  CGF.EmitRuntimeCall(Callee, Args);
11943}
11944
11945void CGOpenMPRuntime::emitOutlinedFunctionCall(
11946    CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11947    ArrayRef<llvm::Value *> Args) const {
11948  emitCall(CGF, Loc, OutlinedFn, Args);
11949}
11950
11951void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11952  if (const auto *FD = dyn_cast<FunctionDecl>(D))
11953    if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11954      HasEmittedDeclareTargetRegion = true;
11955}
11956
11957Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11958                                             const VarDecl *NativeParam,
11959                                             const VarDecl *TargetParam) const {
11960  return CGF.GetAddrOfLocalVar(NativeParam);
11961}
11962
11963Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11964                                                   const VarDecl *VD) {
11965  if (!VD)
11966    return Address::invalid();
11967  Address UntiedAddr = Address::invalid();
11968  Address UntiedRealAddr = Address::invalid();
11969  auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11970  if (It != FunctionToUntiedTaskStackMap.end()) {
11971    const UntiedLocalVarsAddressesMap &UntiedData =
11972        UntiedLocalVarsStack[It->second];
11973    auto I = UntiedData.find(VD);
11974    if (I != UntiedData.end()) {
11975      UntiedAddr = I->second.first;
11976      UntiedRealAddr = I->second.second;
11977    }
11978  }
11979  const VarDecl *CVD = VD->getCanonicalDecl();
11980  if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11981    // Use the default allocation.
11982    if (!isAllocatableDecl(VD))
11983      return UntiedAddr;
11984    llvm::Value *Size;
11985    CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11986    if (CVD->getType()->isVariablyModifiedType()) {
11987      Size = CGF.getTypeSize(CVD->getType());
11988      // Align the size: ((size + align - 1) / align) * align
11989      Size = CGF.Builder.CreateNUWAdd(
11990          Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11991      Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11992      Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11993    } else {
11994      CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11995      Size = CGM.getSize(Sz.alignTo(Align));
11996    }
11997    llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11998    const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11999    assert(AA->getAllocator() &&
12000           "Expected allocator expression for non-default allocator.");
12001    llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12002    // According to the standard, the original allocator type is a enum
12003    // (integer). Convert to pointer type, if required.
12004    Allocator = CGF.EmitScalarConversion(
12005        Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12006        AA->getAllocator()->getExprLoc());
12007    llvm::Value *Args[] = {ThreadID, Size, Allocator};
12008
12009    llvm::Value *Addr =
12010        CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12011                                CGM.getModule(), OMPRTL___kmpc_alloc),
12012                            Args, getName({CVD->getName(), ".void.addr"}));
12013    llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12014        CGM.getModule(), OMPRTL___kmpc_free);
12015    QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12016    Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12017        Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12018    if (UntiedAddr.isValid())
12019      CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12020
12021    // Cleanup action for allocate support.
12022    class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12023      llvm::FunctionCallee RTLFn;
12024      unsigned LocEncoding;
12025      Address Addr;
12026      const Expr *Allocator;
12027
12028    public:
12029      OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
12030                           Address Addr, const Expr *Allocator)
12031          : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12032            Allocator(Allocator) {}
12033      void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12034        if (!CGF.HaveInsertPoint())
12035          return;
12036        llvm::Value *Args[3];
12037        Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12038            CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12039        Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12040            Addr.getPointer(), CGF.VoidPtrTy);
12041        llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12042        // According to the standard, the original allocator type is a enum
12043        // (integer). Convert to pointer type, if required.
12044        AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12045                                            CGF.getContext().VoidPtrTy,
12046                                            Allocator->getExprLoc());
12047        Args[2] = AllocVal;
12048
12049        CGF.EmitRuntimeCall(RTLFn, Args);
12050      }
12051    };
12052    Address VDAddr =
12053        UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12054    CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12055        NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12056        VDAddr, AA->getAllocator());
12057    if (UntiedRealAddr.isValid())
12058      if (auto *Region =
12059              dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12060        Region->emitUntiedSwitch(CGF);
12061    return VDAddr;
12062  }
12063  return UntiedAddr;
12064}
12065
12066bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12067                                             const VarDecl *VD) const {
12068  auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12069  if (It == FunctionToUntiedTaskStackMap.end())
12070    return false;
12071  return UntiedLocalVarsStack[It->second].count(VD) > 0;
12072}
12073
12074CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12075    CodeGenModule &CGM, const OMPLoopDirective &S)
12076    : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12077  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12078  if (!NeedToPush)
12079    return;
12080  NontemporalDeclsSet &DS =
12081      CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12082  for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12083    for (const Stmt *Ref : C->private_refs()) {
12084      const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12085      const ValueDecl *VD;
12086      if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12087        VD = DRE->getDecl();
12088      } else {
12089        const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12090        assert((ME->isImplicitCXXThis() ||
12091                isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12092               "Expected member of current class.");
12093        VD = ME->getMemberDecl();
12094      }
12095      DS.insert(VD);
12096    }
12097  }
12098}
12099
12100CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12101  if (!NeedToPush)
12102    return;
12103  CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12104}
12105
12106CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12107    CodeGenFunction &CGF,
12108    const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12109                          std::pair<Address, Address>> &LocalVars)
12110    : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12111  if (!NeedToPush)
12112    return;
12113  CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12114      CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12115  CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12116}
12117
12118CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12119  if (!NeedToPush)
12120    return;
12121  CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12122}
12123
12124bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12125  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12126
12127  return llvm::any_of(
12128      CGM.getOpenMPRuntime().NontemporalDeclsStack,
12129      [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12130}
12131
12132void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12133    const OMPExecutableDirective &S,
12134    llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12135    const {
12136  llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12137  // Vars in target/task regions must be excluded completely.
12138  if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12139      isOpenMPTaskingDirective(S.getDirectiveKind())) {
12140    SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12141    getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12142    const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12143    for (const CapturedStmt::Capture &Cap : CS->captures()) {
12144      if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12145        NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12146    }
12147  }
12148  // Exclude vars in private clauses.
12149  for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12150    for (const Expr *Ref : C->varlists()) {
12151      if (!Ref->getType()->isScalarType())
12152        continue;
12153      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12154      if (!DRE)
12155        continue;
12156      NeedToCheckForLPCs.insert(DRE->getDecl());
12157    }
12158  }
12159  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12160    for (const Expr *Ref : C->varlists()) {
12161      if (!Ref->getType()->isScalarType())
12162        continue;
12163      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12164      if (!DRE)
12165        continue;
12166      NeedToCheckForLPCs.insert(DRE->getDecl());
12167    }
12168  }
12169  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12170    for (const Expr *Ref : C->varlists()) {
12171      if (!Ref->getType()->isScalarType())
12172        continue;
12173      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12174      if (!DRE)
12175        continue;
12176      NeedToCheckForLPCs.insert(DRE->getDecl());
12177    }
12178  }
12179  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12180    for (const Expr *Ref : C->varlists()) {
12181      if (!Ref->getType()->isScalarType())
12182        continue;
12183      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12184      if (!DRE)
12185        continue;
12186      NeedToCheckForLPCs.insert(DRE->getDecl());
12187    }
12188  }
12189  for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12190    for (const Expr *Ref : C->varlists()) {
12191      if (!Ref->getType()->isScalarType())
12192        continue;
12193      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12194      if (!DRE)
12195        continue;
12196      NeedToCheckForLPCs.insert(DRE->getDecl());
12197    }
12198  }
12199  for (const Decl *VD : NeedToCheckForLPCs) {
12200    for (const LastprivateConditionalData &Data :
12201         llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12202      if (Data.DeclToUniqueName.count(VD) > 0) {
12203        if (!Data.Disabled)
12204          NeedToAddForLPCsAsDisabled.insert(VD);
12205        break;
12206      }
12207    }
12208  }
12209}
12210
12211CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12212    CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12213    : CGM(CGF.CGM),
12214      Action((CGM.getLangOpts().OpenMP >= 50 &&
12215              llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12216                           [](const OMPLastprivateClause *C) {
12217                             return C->getKind() ==
12218                                    OMPC_LASTPRIVATE_conditional;
12219                           }))
12220                 ? ActionToDo::PushAsLastprivateConditional
12221                 : ActionToDo::DoNotPush) {
12222  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12223  if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12224    return;
12225  assert(Action == ActionToDo::PushAsLastprivateConditional &&
12226         "Expected a push action.");
12227  LastprivateConditionalData &Data =
12228      CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12229  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12230    if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12231      continue;
12232
12233    for (const Expr *Ref : C->varlists()) {
12234      Data.DeclToUniqueName.insert(std::make_pair(
12235          cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12236          SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12237    }
12238  }
12239  Data.IVLVal = IVLVal;
12240  Data.Fn = CGF.CurFn;
12241}
12242
12243CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12244    CodeGenFunction &CGF, const OMPExecutableDirective &S)
12245    : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12246  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12247  if (CGM.getLangOpts().OpenMP < 50)
12248    return;
12249  llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12250  tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12251  if (!NeedToAddForLPCsAsDisabled.empty()) {
12252    Action = ActionToDo::DisableLastprivateConditional;
12253    LastprivateConditionalData &Data =
12254        CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12255    for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12256      Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12257    Data.Fn = CGF.CurFn;
12258    Data.Disabled = true;
12259  }
12260}
12261
12262CGOpenMPRuntime::LastprivateConditionalRAII
12263CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12264    CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12265  return LastprivateConditionalRAII(CGF, S);
12266}
12267
12268CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12269  if (CGM.getLangOpts().OpenMP < 50)
12270    return;
12271  if (Action == ActionToDo::DisableLastprivateConditional) {
12272    assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12273           "Expected list of disabled private vars.");
12274    CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12275  }
12276  if (Action == ActionToDo::PushAsLastprivateConditional) {
12277    assert(
12278        !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12279        "Expected list of lastprivate conditional vars.");
12280    CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12281  }
12282}
12283
12284Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12285                                                        const VarDecl *VD) {
12286  ASTContext &C = CGM.getContext();
12287  auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12288  if (I == LastprivateConditionalToTypes.end())
12289    I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12290  QualType NewType;
12291  const FieldDecl *VDField;
12292  const FieldDecl *FiredField;
12293  LValue BaseLVal;
12294  auto VI = I->getSecond().find(VD);
12295  if (VI == I->getSecond().end()) {
12296    RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12297    RD->startDefinition();
12298    VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12299    FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12300    RD->completeDefinition();
12301    NewType = C.getRecordType(RD);
12302    Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12303    BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12304    I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12305  } else {
12306    NewType = std::get<0>(VI->getSecond());
12307    VDField = std::get<1>(VI->getSecond());
12308    FiredField = std::get<2>(VI->getSecond());
12309    BaseLVal = std::get<3>(VI->getSecond());
12310  }
12311  LValue FiredLVal =
12312      CGF.EmitLValueForField(BaseLVal, FiredField);
12313  CGF.EmitStoreOfScalar(
12314      llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12315      FiredLVal);
12316  return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12317}
12318
12319namespace {
12320/// Checks if the lastprivate conditional variable is referenced in LHS.
12321class LastprivateConditionalRefChecker final
12322    : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12323  ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12324  const Expr *FoundE = nullptr;
12325  const Decl *FoundD = nullptr;
12326  StringRef UniqueDeclName;
12327  LValue IVLVal;
12328  llvm::Function *FoundFn = nullptr;
12329  SourceLocation Loc;
12330
12331public:
12332  bool VisitDeclRefExpr(const DeclRefExpr *E) {
12333    for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12334         llvm::reverse(LPM)) {
12335      auto It = D.DeclToUniqueName.find(E->getDecl());
12336      if (It == D.DeclToUniqueName.end())
12337        continue;
12338      if (D.Disabled)
12339        return false;
12340      FoundE = E;
12341      FoundD = E->getDecl()->getCanonicalDecl();
12342      UniqueDeclName = It->second;
12343      IVLVal = D.IVLVal;
12344      FoundFn = D.Fn;
12345      break;
12346    }
12347    return FoundE == E;
12348  }
12349  bool VisitMemberExpr(const MemberExpr *E) {
12350    if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12351      return false;
12352    for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12353         llvm::reverse(LPM)) {
12354      auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12355      if (It == D.DeclToUniqueName.end())
12356        continue;
12357      if (D.Disabled)
12358        return false;
12359      FoundE = E;
12360      FoundD = E->getMemberDecl()->getCanonicalDecl();
12361      UniqueDeclName = It->second;
12362      IVLVal = D.IVLVal;
12363      FoundFn = D.Fn;
12364      break;
12365    }
12366    return FoundE == E;
12367  }
12368  bool VisitStmt(const Stmt *S) {
12369    for (const Stmt *Child : S->children()) {
12370      if (!Child)
12371        continue;
12372      if (const auto *E = dyn_cast<Expr>(Child))
12373        if (!E->isGLValue())
12374          continue;
12375      if (Visit(Child))
12376        return true;
12377    }
12378    return false;
12379  }
12380  explicit LastprivateConditionalRefChecker(
12381      ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12382      : LPM(LPM) {}
12383  std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12384  getFoundData() const {
12385    return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12386  }
12387};
12388} // namespace
12389
12390void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12391                                                       LValue IVLVal,
12392                                                       StringRef UniqueDeclName,
12393                                                       LValue LVal,
12394                                                       SourceLocation Loc) {
12395  // Last updated loop counter for the lastprivate conditional var.
12396  // int<xx> last_iv = 0;
12397  llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12398  llvm::Constant *LastIV =
12399      getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12400  cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12401      IVLVal.getAlignment().getAsAlign());
12402  LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12403
12404  // Last value of the lastprivate conditional.
12405  // decltype(priv_a) last_a;
12406  llvm::Constant *Last = getOrCreateInternalVariable(
12407      CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12408  cast<llvm::GlobalVariable>(Last)->setAlignment(
12409      LVal.getAlignment().getAsAlign());
12410  LValue LastLVal =
12411      CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12412
12413  // Global loop counter. Required to handle inner parallel-for regions.
12414  // iv
12415  llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12416
12417  // #pragma omp critical(a)
12418  // if (last_iv <= iv) {
12419  //   last_iv = iv;
12420  //   last_a = priv_a;
12421  // }
12422  auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12423                    Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12424    Action.Enter(CGF);
12425    llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12426    // (last_iv <= iv) ? Check if the variable is updated and store new
12427    // value in global var.
12428    llvm::Value *CmpRes;
12429    if (IVLVal.getType()->isSignedIntegerType()) {
12430      CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12431    } else {
12432      assert(IVLVal.getType()->isUnsignedIntegerType() &&
12433             "Loop iteration variable must be integer.");
12434      CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12435    }
12436    llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12437    llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12438    CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12439    // {
12440    CGF.EmitBlock(ThenBB);
12441
12442    //   last_iv = iv;
12443    CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12444
12445    //   last_a = priv_a;
12446    switch (CGF.getEvaluationKind(LVal.getType())) {
12447    case TEK_Scalar: {
12448      llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12449      CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12450      break;
12451    }
12452    case TEK_Complex: {
12453      CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12454      CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12455      break;
12456    }
12457    case TEK_Aggregate:
12458      llvm_unreachable(
12459          "Aggregates are not supported in lastprivate conditional.");
12460    }
12461    // }
12462    CGF.EmitBranch(ExitBB);
12463    // There is no need to emit line number for unconditional branch.
12464    (void)ApplyDebugLocation::CreateEmpty(CGF);
12465    CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12466  };
12467
12468  if (CGM.getLangOpts().OpenMPSimd) {
12469    // Do not emit as a critical region as no parallel region could be emitted.
12470    RegionCodeGenTy ThenRCG(CodeGen);
12471    ThenRCG(CGF);
12472  } else {
12473    emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12474  }
12475}
12476
12477void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12478                                                         const Expr *LHS) {
12479  if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12480    return;
12481  LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12482  if (!Checker.Visit(LHS))
12483    return;
12484  const Expr *FoundE;
12485  const Decl *FoundD;
12486  StringRef UniqueDeclName;
12487  LValue IVLVal;
12488  llvm::Function *FoundFn;
12489  std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12490      Checker.getFoundData();
12491  if (FoundFn != CGF.CurFn) {
12492    // Special codegen for inner parallel regions.
12493    // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12494    auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12495    assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12496           "Lastprivate conditional is not found in outer region.");
12497    QualType StructTy = std::get<0>(It->getSecond());
12498    const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12499    LValue PrivLVal = CGF.EmitLValue(FoundE);
12500    Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12501        PrivLVal.getAddress(CGF),
12502        CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12503    LValue BaseLVal =
12504        CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12505    LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12506    CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12507                            CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12508                        FiredLVal, llvm::AtomicOrdering::Unordered,
12509                        /*IsVolatile=*/true, /*isInit=*/false);
12510    return;
12511  }
12512
12513  // Private address of the lastprivate conditional in the current context.
12514  // priv_a
12515  LValue LVal = CGF.EmitLValue(FoundE);
12516  emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12517                                   FoundE->getExprLoc());
12518}
12519
12520void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12521    CodeGenFunction &CGF, const OMPExecutableDirective &D,
12522    const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12523  if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12524    return;
12525  auto Range = llvm::reverse(LastprivateConditionalStack);
12526  auto It = llvm::find_if(
12527      Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12528  if (It == Range.end() || It->Fn != CGF.CurFn)
12529    return;
12530  auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12531  assert(LPCI != LastprivateConditionalToTypes.end() &&
12532         "Lastprivates must be registered already.");
12533  SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12534  getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12535  const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12536  for (const auto &Pair : It->DeclToUniqueName) {
12537    const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12538    if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12539      continue;
12540    auto I = LPCI->getSecond().find(Pair.first);
12541    assert(I != LPCI->getSecond().end() &&
12542           "Lastprivate must be rehistered already.");
12543    // bool Cmp = priv_a.Fired != 0;
12544    LValue BaseLVal = std::get<3>(I->getSecond());
12545    LValue FiredLVal =
12546        CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12547    llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12548    llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12549    llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12550    llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12551    // if (Cmp) {
12552    CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12553    CGF.EmitBlock(ThenBB);
12554    Address Addr = CGF.GetAddrOfLocalVar(VD);
12555    LValue LVal;
12556    if (VD->getType()->isReferenceType())
12557      LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12558                                           AlignmentSource::Decl);
12559    else
12560      LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12561                                AlignmentSource::Decl);
12562    emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12563                                     D.getBeginLoc());
12564    auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12565    CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12566    // }
12567  }
12568}
12569
12570void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12571    CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12572    SourceLocation Loc) {
12573  if (CGF.getLangOpts().OpenMP < 50)
12574    return;
12575  auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12576  assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12577         "Unknown lastprivate conditional variable.");
12578  StringRef UniqueName = It->second;
12579  llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12580  // The variable was not updated in the region - exit.
12581  if (!GV)
12582    return;
12583  LValue LPLVal = CGF.MakeAddrLValue(
12584      GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12585  llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12586  CGF.EmitStoreOfScalar(Res, PrivLVal);
12587}
12588
12589llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12590    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12591    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12592  llvm_unreachable("Not supported in SIMD-only mode");
12593}
12594
12595llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12596    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12597    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12598  llvm_unreachable("Not supported in SIMD-only mode");
12599}
12600
12601llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12602    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12603    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12604    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12605    bool Tied, unsigned &NumberOfParts) {
12606  llvm_unreachable("Not supported in SIMD-only mode");
12607}
12608
12609void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12610                                           SourceLocation Loc,
12611                                           llvm::Function *OutlinedFn,
12612                                           ArrayRef<llvm::Value *> CapturedVars,
12613                                           const Expr *IfCond) {
12614  llvm_unreachable("Not supported in SIMD-only mode");
12615}
12616
12617void CGOpenMPSIMDRuntime::emitCriticalRegion(
12618    CodeGenFunction &CGF, StringRef CriticalName,
12619    const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12620    const Expr *Hint) {
12621  llvm_unreachable("Not supported in SIMD-only mode");
12622}
12623
12624void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12625                                           const RegionCodeGenTy &MasterOpGen,
12626                                           SourceLocation Loc) {
12627  llvm_unreachable("Not supported in SIMD-only mode");
12628}
12629
12630void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12631                                           const RegionCodeGenTy &MasterOpGen,
12632                                           SourceLocation Loc,
12633                                           const Expr *Filter) {
12634  llvm_unreachable("Not supported in SIMD-only mode");
12635}
12636
12637void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12638                                            SourceLocation Loc) {
12639  llvm_unreachable("Not supported in SIMD-only mode");
12640}
12641
12642void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12643    CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12644    SourceLocation Loc) {
12645  llvm_unreachable("Not supported in SIMD-only mode");
12646}
12647
12648void CGOpenMPSIMDRuntime::emitSingleRegion(
12649    CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12650    SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12651    ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12652    ArrayRef<const Expr *> AssignmentOps) {
12653  llvm_unreachable("Not supported in SIMD-only mode");
12654}
12655
12656void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12657                                            const RegionCodeGenTy &OrderedOpGen,
12658                                            SourceLocation Loc,
12659                                            bool IsThreads) {
12660  llvm_unreachable("Not supported in SIMD-only mode");
12661}
12662
12663void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12664                                          SourceLocation Loc,
12665                                          OpenMPDirectiveKind Kind,
12666                                          bool EmitChecks,
12667                                          bool ForceSimpleCall) {
12668  llvm_unreachable("Not supported in SIMD-only mode");
12669}
12670
12671void CGOpenMPSIMDRuntime::emitForDispatchInit(
12672    CodeGenFunction &CGF, SourceLocation Loc,
12673    const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12674    bool Ordered, const DispatchRTInput &DispatchValues) {
12675  llvm_unreachable("Not supported in SIMD-only mode");
12676}
12677
12678void CGOpenMPSIMDRuntime::emitForStaticInit(
12679    CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12680    const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12681  llvm_unreachable("Not supported in SIMD-only mode");
12682}
12683
12684void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12685    CodeGenFunction &CGF, SourceLocation Loc,
12686    OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12687  llvm_unreachable("Not supported in SIMD-only mode");
12688}
12689
12690void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12691                                                     SourceLocation Loc,
12692                                                     unsigned IVSize,
12693                                                     bool IVSigned) {
12694  llvm_unreachable("Not supported in SIMD-only mode");
12695}
12696
12697void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12698                                              SourceLocation Loc,
12699                                              OpenMPDirectiveKind DKind) {
12700  llvm_unreachable("Not supported in SIMD-only mode");
12701}
12702
12703llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12704                                              SourceLocation Loc,
12705                                              unsigned IVSize, bool IVSigned,
12706                                              Address IL, Address LB,
12707                                              Address UB, Address ST) {
12708  llvm_unreachable("Not supported in SIMD-only mode");
12709}
12710
12711void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12712                                               llvm::Value *NumThreads,
12713                                               SourceLocation Loc) {
12714  llvm_unreachable("Not supported in SIMD-only mode");
12715}
12716
12717void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12718                                             ProcBindKind ProcBind,
12719                                             SourceLocation Loc) {
12720  llvm_unreachable("Not supported in SIMD-only mode");
12721}
12722
12723Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12724                                                    const VarDecl *VD,
12725                                                    Address VDAddr,
12726                                                    SourceLocation Loc) {
12727  llvm_unreachable("Not supported in SIMD-only mode");
12728}
12729
12730llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12731    const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12732    CodeGenFunction *CGF) {
12733  llvm_unreachable("Not supported in SIMD-only mode");
12734}
12735
12736Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12737    CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12738  llvm_unreachable("Not supported in SIMD-only mode");
12739}
12740
12741void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12742                                    ArrayRef<const Expr *> Vars,
12743                                    SourceLocation Loc,
12744                                    llvm::AtomicOrdering AO) {
12745  llvm_unreachable("Not supported in SIMD-only mode");
12746}
12747
12748void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12749                                       const OMPExecutableDirective &D,
12750                                       llvm::Function *TaskFunction,
12751                                       QualType SharedsTy, Address Shareds,
12752                                       const Expr *IfCond,
12753                                       const OMPTaskDataTy &Data) {
12754  llvm_unreachable("Not supported in SIMD-only mode");
12755}
12756
12757void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12758    CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12759    llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12760    const Expr *IfCond, const OMPTaskDataTy &Data) {
12761  llvm_unreachable("Not supported in SIMD-only mode");
12762}
12763
12764void CGOpenMPSIMDRuntime::emitReduction(
12765    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12766    ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12767    ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12768  assert(Options.SimpleReduction && "Only simple reduction is expected.");
12769  CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12770                                 ReductionOps, Options);
12771}
12772
12773llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12774    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12775    ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12776  llvm_unreachable("Not supported in SIMD-only mode");
12777}
12778
12779void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12780                                                SourceLocation Loc,
12781                                                bool IsWorksharingReduction) {
12782  llvm_unreachable("Not supported in SIMD-only mode");
12783}
12784
12785void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12786                                                  SourceLocation Loc,
12787                                                  ReductionCodeGen &RCG,
12788                                                  unsigned N) {
12789  llvm_unreachable("Not supported in SIMD-only mode");
12790}
12791
12792Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12793                                                  SourceLocation Loc,
12794                                                  llvm::Value *ReductionsPtr,
12795                                                  LValue SharedLVal) {
12796  llvm_unreachable("Not supported in SIMD-only mode");
12797}
12798
12799void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12800                                           SourceLocation Loc) {
12801  llvm_unreachable("Not supported in SIMD-only mode");
12802}
12803
12804void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12805    CodeGenFunction &CGF, SourceLocation Loc,
12806    OpenMPDirectiveKind CancelRegion) {
12807  llvm_unreachable("Not supported in SIMD-only mode");
12808}
12809
12810void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12811                                         SourceLocation Loc, const Expr *IfCond,
12812                                         OpenMPDirectiveKind CancelRegion) {
12813  llvm_unreachable("Not supported in SIMD-only mode");
12814}
12815
12816void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12817    const OMPExecutableDirective &D, StringRef ParentName,
12818    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12819    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12820  llvm_unreachable("Not supported in SIMD-only mode");
12821}
12822
12823void CGOpenMPSIMDRuntime::emitTargetCall(
12824    CodeGenFunction &CGF, const OMPExecutableDirective &D,
12825    llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12826    llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12827    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12828                                     const OMPLoopDirective &D)>
12829        SizeEmitter) {
12830  llvm_unreachable("Not supported in SIMD-only mode");
12831}
12832
12833bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12834  llvm_unreachable("Not supported in SIMD-only mode");
12835}
12836
12837bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12838  llvm_unreachable("Not supported in SIMD-only mode");
12839}
12840
12841bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12842  return false;
12843}
12844
12845void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12846                                        const OMPExecutableDirective &D,
12847                                        SourceLocation Loc,
12848                                        llvm::Function *OutlinedFn,
12849                                        ArrayRef<llvm::Value *> CapturedVars) {
12850  llvm_unreachable("Not supported in SIMD-only mode");
12851}
12852
12853void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12854                                             const Expr *NumTeams,
12855                                             const Expr *ThreadLimit,
12856                                             SourceLocation Loc) {
12857  llvm_unreachable("Not supported in SIMD-only mode");
12858}
12859
12860void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12861    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12862    const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12863  llvm_unreachable("Not supported in SIMD-only mode");
12864}
12865
12866void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12867    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12868    const Expr *Device) {
12869  llvm_unreachable("Not supported in SIMD-only mode");
12870}
12871
12872void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12873                                           const OMPLoopDirective &D,
12874                                           ArrayRef<Expr *> NumIterations) {
12875  llvm_unreachable("Not supported in SIMD-only mode");
12876}
12877
12878void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12879                                              const OMPDependClause *C) {
12880  llvm_unreachable("Not supported in SIMD-only mode");
12881}
12882
12883const VarDecl *
12884CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12885                                        const VarDecl *NativeParam) const {
12886  llvm_unreachable("Not supported in SIMD-only mode");
12887}
12888
12889Address
12890CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12891                                         const VarDecl *NativeParam,
12892                                         const VarDecl *TargetParam) const {
12893  llvm_unreachable("Not supported in SIMD-only mode");
12894}
12895