1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "CGCXXABI.h"
15#include "CGCleanup.h"
16#include "CGRecordLayout.h"
17#include "CodeGenFunction.h"
18#include "TargetInfo.h"
19#include "clang/AST/APValue.h"
20#include "clang/AST/Attr.h"
21#include "clang/AST/Decl.h"
22#include "clang/AST/OpenMPClause.h"
23#include "clang/AST/StmtOpenMP.h"
24#include "clang/AST/StmtVisitor.h"
25#include "clang/Basic/BitmaskEnum.h"
26#include "clang/Basic/FileManager.h"
27#include "clang/Basic/OpenMPKinds.h"
28#include "clang/Basic/SourceManager.h"
29#include "clang/CodeGen/ConstantInitBuilder.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SetOperations.h"
32#include "llvm/ADT/SmallBitVector.h"
33#include "llvm/ADT/StringExtras.h"
34#include "llvm/Bitcode/BitcodeReader.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/GlobalValue.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Value.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/Format.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56  /// Kinds of OpenMP regions used in codegen.
57  enum CGOpenMPRegionKind {
58    /// Region with outlined function for standalone 'parallel'
59    /// directive.
60    ParallelOutlinedRegion,
61    /// Region with outlined function for standalone 'task' directive.
62    TaskOutlinedRegion,
63    /// Region for constructs that do not require function outlining,
64    /// like 'for', 'sections', 'atomic' etc. directives.
65    InlinedRegion,
66    /// Region with outlined function for standalone 'target' directive.
67    TargetRegion,
68  };
69
70  CGOpenMPRegionInfo(const CapturedStmt &CS,
71                     const CGOpenMPRegionKind RegionKind,
72                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73                     bool HasCancel)
74      : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75        CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79                     bool HasCancel)
80      : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81        Kind(Kind), HasCancel(HasCancel) {}
82
83  /// Get a variable or parameter for storing global thread id
84  /// inside OpenMP construct.
85  virtual const VarDecl *getThreadIDVariable() const = 0;
86
87  /// Emit the captured statement body.
88  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90  /// Get an LValue for the current ThreadID variable.
91  /// \return LValue for thread id variable. This LValue always has type int32*.
92  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100  bool hasCancel() const { return HasCancel; }
101
102  static bool classof(const CGCapturedStmtInfo *Info) {
103    return Info->getKind() == CR_OpenMP;
104  }
105
106  ~CGOpenMPRegionInfo() override = default;
107
108protected:
109  CGOpenMPRegionKind RegionKind;
110  RegionCodeGenTy CodeGen;
111  OpenMPDirectiveKind Kind;
112  bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119                             const RegionCodeGenTy &CodeGen,
120                             OpenMPDirectiveKind Kind, bool HasCancel,
121                             StringRef HelperName)
122      : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123                           HasCancel),
124        ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126  }
127
128  /// Get a variable or parameter for storing global thread id
129  /// inside OpenMP construct.
130  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132  /// Get the name of the capture helper.
133  StringRef getHelperName() const override { return HelperName; }
134
135  static bool classof(const CGCapturedStmtInfo *Info) {
136    return CGOpenMPRegionInfo::classof(Info) &&
137           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
138               ParallelOutlinedRegion;
139  }
140
141private:
142  /// A variable or parameter storing global thread id for OpenMP
143  /// constructs.
144  const VarDecl *ThreadIDVar;
145  StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151  class UntiedTaskActionTy final : public PrePostActionTy {
152    bool Untied;
153    const VarDecl *PartIDVar;
154    const RegionCodeGenTy UntiedCodeGen;
155    llvm::SwitchInst *UntiedSwitch = nullptr;
156
157  public:
158    UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159                       const RegionCodeGenTy &UntiedCodeGen)
160        : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161    void Enter(CodeGenFunction &CGF) override {
162      if (Untied) {
163        // Emit task switching point.
164        LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165            CGF.GetAddrOfLocalVar(PartIDVar),
166            PartIDVar->getType()->castAs<PointerType>());
167        llvm::Value *Res =
168            CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169        llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
170        UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
171        CGF.EmitBlock(DoneBB);
172        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
173        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
174        UntiedSwitch->addCase(CGF.Builder.getInt32(0),
175                              CGF.Builder.GetInsertBlock());
176        emitUntiedSwitch(CGF);
177      }
178    }
179    void emitUntiedSwitch(CodeGenFunction &CGF) const {
180      if (Untied) {
181        LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182            CGF.GetAddrOfLocalVar(PartIDVar),
183            PartIDVar->getType()->castAs<PointerType>());
184        CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185                              PartIdLVal);
186        UntiedCodeGen(CGF);
187        CodeGenFunction::JumpDest CurPoint =
188            CGF.getJumpDestInCurrentScope(".untied.next.");
189        CGF.EmitBranch(CGF.ReturnBlock.getBlock());
190        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
191        UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
192                              CGF.Builder.GetInsertBlock());
193        CGF.EmitBranchThroughCleanup(CurPoint);
194        CGF.EmitBlock(CurPoint.getBlock());
195      }
196    }
197    unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198  };
199  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200                                 const VarDecl *ThreadIDVar,
201                                 const RegionCodeGenTy &CodeGen,
202                                 OpenMPDirectiveKind Kind, bool HasCancel,
203                                 const UntiedTaskActionTy &Action)
204      : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205        ThreadIDVar(ThreadIDVar), Action(Action) {
206    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207  }
208
209  /// Get a variable or parameter for storing global thread id
210  /// inside OpenMP construct.
211  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213  /// Get an LValue for the current ThreadID variable.
214  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216  /// Get the name of the capture helper.
217  StringRef getHelperName() const override { return ".omp_outlined."; }
218
219  void emitUntiedSwitch(CodeGenFunction &CGF) override {
220    Action.emitUntiedSwitch(CGF);
221  }
222
223  static bool classof(const CGCapturedStmtInfo *Info) {
224    return CGOpenMPRegionInfo::classof(Info) &&
225           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
226               TaskOutlinedRegion;
227  }
228
229private:
230  /// A variable or parameter storing global thread id for OpenMP
231  /// constructs.
232  const VarDecl *ThreadIDVar;
233  /// Action for emitting code for untied tasks.
234  const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242                            const RegionCodeGenTy &CodeGen,
243                            OpenMPDirectiveKind Kind, bool HasCancel)
244      : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245        OldCSI(OldCSI),
246        OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
247
248  // Retrieve the value of the context parameter.
249  llvm::Value *getContextValue() const override {
250    if (OuterRegionInfo)
251      return OuterRegionInfo->getContextValue();
252    llvm_unreachable("No context value for inlined OpenMP region");
253  }
254
255  void setContextValue(llvm::Value *V) override {
256    if (OuterRegionInfo) {
257      OuterRegionInfo->setContextValue(V);
258      return;
259    }
260    llvm_unreachable("No context value for inlined OpenMP region");
261  }
262
263  /// Lookup the captured field decl for a variable.
264  const FieldDecl *lookup(const VarDecl *VD) const override {
265    if (OuterRegionInfo)
266      return OuterRegionInfo->lookup(VD);
267    // If there is no outer outlined region,no need to lookup in a list of
268    // captured variables, we can use the original one.
269    return nullptr;
270  }
271
272  FieldDecl *getThisFieldDecl() const override {
273    if (OuterRegionInfo)
274      return OuterRegionInfo->getThisFieldDecl();
275    return nullptr;
276  }
277
278  /// Get a variable or parameter for storing global thread id
279  /// inside OpenMP construct.
280  const VarDecl *getThreadIDVariable() const override {
281    if (OuterRegionInfo)
282      return OuterRegionInfo->getThreadIDVariable();
283    return nullptr;
284  }
285
286  /// Get an LValue for the current ThreadID variable.
287  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288    if (OuterRegionInfo)
289      return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290    llvm_unreachable("No LValue for inlined OpenMP construct");
291  }
292
293  /// Get the name of the capture helper.
294  StringRef getHelperName() const override {
295    if (auto *OuterRegionInfo = getOldCSI())
296      return OuterRegionInfo->getHelperName();
297    llvm_unreachable("No helper name for inlined OpenMP construct");
298  }
299
300  void emitUntiedSwitch(CodeGenFunction &CGF) override {
301    if (OuterRegionInfo)
302      OuterRegionInfo->emitUntiedSwitch(CGF);
303  }
304
305  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307  static bool classof(const CGCapturedStmtInfo *Info) {
308    return CGOpenMPRegionInfo::classof(Info) &&
309           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
310  }
311
312  ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315  /// CodeGen info about outer OpenMP region.
316  CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317  CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328                           const RegionCodeGenTy &CodeGen, StringRef HelperName)
329      : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330                           /*HasCancel=*/false),
331        HelperName(HelperName) {}
332
333  /// This is unused for target regions because each starts executing
334  /// with a single thread.
335  const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337  /// Get the name of the capture helper.
338  StringRef getHelperName() const override { return HelperName; }
339
340  static bool classof(const CGCapturedStmtInfo *Info) {
341    return CGOpenMPRegionInfo::classof(Info) &&
342           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
343  }
344
345private:
346  StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350  llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357      : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358                                  OMPD_unknown,
359                                  /*HasCancel=*/false),
360        PrivScope(CGF) {
361    // Make sure the globals captured in the provided statement are local by
362    // using the privatization logic. We assume the same variable is not
363    // captured more than once.
364    for (const auto &C : CS.captures()) {
365      if (!C.capturesVariable() && !C.capturesVariableByCopy())
366        continue;
367
368      const VarDecl *VD = C.getCapturedVar();
369      if (VD->isLocalVarDeclOrParm())
370        continue;
371
372      DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373                      /*RefersToEnclosingVariableOrCapture=*/false,
374                      VD->getType().getNonReferenceType(), VK_LValue,
375                      C.getLocation());
376      PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
377    }
378    (void)PrivScope.Privatize();
379  }
380
381  /// Lookup the captured field decl for a variable.
382  const FieldDecl *lookup(const VarDecl *VD) const override {
383    if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384      return FD;
385    return nullptr;
386  }
387
388  /// Emit the captured statement body.
389  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390    llvm_unreachable("No body for expressions");
391  }
392
393  /// Get a variable or parameter for storing global thread id
394  /// inside OpenMP construct.
395  const VarDecl *getThreadIDVariable() const override {
396    llvm_unreachable("No thread id for expressions");
397  }
398
399  /// Get the name of the capture helper.
400  StringRef getHelperName() const override {
401    llvm_unreachable("No helper name for expressions");
402  }
403
404  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407  /// Private scope to capture global variables.
408  CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413  CodeGenFunction &CGF;
414  llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415  FieldDecl *LambdaThisCaptureField = nullptr;
416  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417  bool NoInheritance = false;
418
419public:
420  /// Constructs region for combined constructs.
421  /// \param CodeGen Code generation sequence for combined directives. Includes
422  /// a list of functions used for code generation of implicitly inlined
423  /// regions.
424  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425                          OpenMPDirectiveKind Kind, bool HasCancel,
426                          bool NoInheritance = true)
427      : CGF(CGF), NoInheritance(NoInheritance) {
428    // Start emission for the construct.
429    CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430        CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431    if (NoInheritance) {
432      std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433      LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434      CGF.LambdaThisCaptureField = nullptr;
435      BlockInfo = CGF.BlockInfo;
436      CGF.BlockInfo = nullptr;
437    }
438  }
439
440  ~InlinedOpenMPRegionRAII() {
441    // Restore original CapturedStmtInfo only if we're done with code emission.
442    auto *OldCSI =
443        cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
444    delete CGF.CapturedStmtInfo;
445    CGF.CapturedStmtInfo = OldCSI;
446    if (NoInheritance) {
447      std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
448      CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449      CGF.BlockInfo = BlockInfo;
450    }
451  }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458  /// Use trampoline for internal microtask.
459  OMP_IDENT_IMD = 0x01,
460  /// Use c-style ident structure.
461  OMP_IDENT_KMPC = 0x02,
462  /// Atomic reduction option for kmpc_reduce.
463  OMP_ATOMIC_REDUCE = 0x10,
464  /// Explicit 'barrier' directive.
465  OMP_IDENT_BARRIER_EXPL = 0x20,
466  /// Implicit barrier in code.
467  OMP_IDENT_BARRIER_IMPL = 0x40,
468  /// Implicit barrier in 'for' directive.
469  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470  /// Implicit barrier in 'sections' directive.
471  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472  /// Implicit barrier in 'single' directive.
473  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474  /// Call of __kmp_for_static_init for static loop.
475  OMP_IDENT_WORK_LOOP = 0x200,
476  /// Call of __kmp_for_static_init for sections.
477  OMP_IDENT_WORK_SECTIONS = 0x400,
478  /// Call of __kmp_for_static_init for distribute.
479  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
489///                                  see above  */
490///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
491///                                  KMP_IDENT_KMPC identifies this union
492///                                  member  */
493///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
494///                                  see above */
495///#if USE_ITT_BUILD
496///                            /*  but currently used for storing
497///                                region-specific ITT */
498///                            /*  contextual information. */
499///#endif /* USE_ITT_BUILD */
500///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
501///                                 C++  */
502///    char const *psource;    /**< String describing the source location.
503///                            The string is composed of semi-colon separated
504//                             fields which describe the source file,
505///                            the function and a pair of line numbers that
506///                            delimit the construct.
507///                             */
508/// } ident_t;
509enum IdentFieldIndex {
510  /// might be used in Fortran
511  IdentField_Reserved_1,
512  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513  IdentField_Flags,
514  /// Not really used in Fortran any more
515  IdentField_Reserved_2,
516  /// Source[4] in Fortran, do not use for C++
517  IdentField_Reserved_3,
518  /// String describing the source location. The string is composed of
519  /// semi-colon separated fields which describe the source file, the function
520  /// and a pair of line numbers that delimit the construct.
521  IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527  /// Lower bound for default (unordered) versions.
528  OMP_sch_lower = 32,
529  OMP_sch_static_chunked = 33,
530  OMP_sch_static = 34,
531  OMP_sch_dynamic_chunked = 35,
532  OMP_sch_guided_chunked = 36,
533  OMP_sch_runtime = 37,
534  OMP_sch_auto = 38,
535  /// static with chunk adjustment (e.g., simd)
536  OMP_sch_static_balanced_chunked = 45,
537  /// Lower bound for 'ordered' versions.
538  OMP_ord_lower = 64,
539  OMP_ord_static_chunked = 65,
540  OMP_ord_static = 66,
541  OMP_ord_dynamic_chunked = 67,
542  OMP_ord_guided_chunked = 68,
543  OMP_ord_runtime = 69,
544  OMP_ord_auto = 70,
545  OMP_sch_default = OMP_sch_static,
546  /// dist_schedule types
547  OMP_dist_sch_static_chunked = 91,
548  OMP_dist_sch_static = 92,
549  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550  /// Set if the monotonic schedule modifier was present.
551  OMP_sch_modifier_monotonic = (1 << 29),
552  /// Set if the nonmonotonic schedule modifier was present.
553  OMP_sch_modifier_nonmonotonic = (1 << 30),
554};
555
556/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557/// region.
558class CleanupTy final : public EHScopeStack::Cleanup {
559  PrePostActionTy *Action;
560
561public:
562  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564    if (!CGF.HaveInsertPoint())
565      return;
566    Action->Exit(CGF);
567  }
568};
569
570} // anonymous namespace
571
572void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
573  CodeGenFunction::RunCleanupsScope Scope(CGF);
574  if (PrePostAction) {
575    CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
576    Callback(CodeGen, CGF, *PrePostAction);
577  } else {
578    PrePostActionTy Action;
579    Callback(CodeGen, CGF, Action);
580  }
581}
582
583/// Check if the combiner is a call to UDR combiner and if it is so return the
584/// UDR decl used for reduction.
585static const OMPDeclareReductionDecl *
586getReductionInit(const Expr *ReductionOp) {
587  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
588    if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
589      if (const auto *DRE =
590              dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
591        if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
592          return DRD;
593  return nullptr;
594}
595
596static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
597                                             const OMPDeclareReductionDecl *DRD,
598                                             const Expr *InitOp,
599                                             Address Private, Address Original,
600                                             QualType Ty) {
601  if (DRD->getInitializer()) {
602    std::pair<llvm::Function *, llvm::Function *> Reduction =
603        CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
604    const auto *CE = cast<CallExpr>(InitOp);
605    const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
606    const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607    const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608    const auto *LHSDRE =
609        cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
610    const auto *RHSDRE =
611        cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
612    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613    PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
614    PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
615    (void)PrivateScope.Privatize();
616    RValue Func = RValue::get(Reduction.second);
617    CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
618    CGF.EmitIgnoredExpr(InitOp);
619  } else {
620    llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
621    std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
622    auto *GV = new llvm::GlobalVariable(
623        CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624        llvm::GlobalValue::PrivateLinkage, Init, Name);
625    LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
626    RValue InitRVal;
627    switch (CGF.getEvaluationKind(Ty)) {
628    case TEK_Scalar:
629      InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
630      break;
631    case TEK_Complex:
632      InitRVal =
633          RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
634      break;
635    case TEK_Aggregate: {
636      OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637      CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638      CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
639                           /*IsInitializer=*/false);
640      return;
641    }
642    }
643    OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644    CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645    CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
646                         /*IsInitializer=*/false);
647  }
648}
649
650/// Emit initialization of arrays of complex types.
651/// \param DestAddr Address of the array.
652/// \param Type Type of array.
653/// \param Init Initial expression of array.
654/// \param SrcAddr Address of the original array.
655static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
656                                 QualType Type, bool EmitDeclareReductionInit,
657                                 const Expr *Init,
658                                 const OMPDeclareReductionDecl *DRD,
659                                 Address SrcAddr = Address::invalid()) {
660  // Perform element-by-element initialization.
661  QualType ElementTy;
662
663  // Drill down to the base element type on both arrays.
664  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
666  if (DRD)
667    SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
668
669  llvm::Value *SrcBegin = nullptr;
670  if (DRD)
671    SrcBegin = SrcAddr.getPointer();
672  llvm::Value *DestBegin = DestAddr.getPointer();
673  // Cast from pointer to array type to pointer to single element.
674  llvm::Value *DestEnd =
675      CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
676  // The basic structure here is a while-do loop.
677  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
678  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
679  llvm::Value *IsEmpty =
680      CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
681  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
682
683  // Enter the loop body, making that address the current address.
684  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685  CGF.EmitBlock(BodyBB);
686
687  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
688
689  llvm::PHINode *SrcElementPHI = nullptr;
690  Address SrcElementCurrent = Address::invalid();
691  if (DRD) {
692    SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
693                                          "omp.arraycpy.srcElementPast");
694    SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695    SrcElementCurrent =
696        Address(SrcElementPHI, SrcAddr.getElementType(),
697                SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
698  }
699  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700      DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701  DestElementPHI->addIncoming(DestBegin, EntryBB);
702  Address DestElementCurrent =
703      Address(DestElementPHI, DestAddr.getElementType(),
704              DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
705
706  // Emit copy.
707  {
708    CodeGenFunction::RunCleanupsScope InitScope(CGF);
709    if (EmitDeclareReductionInit) {
710      emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
711                                       SrcElementCurrent, ElementTy);
712    } else
713      CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
714                           /*IsInitializer=*/false);
715  }
716
717  if (DRD) {
718    // Shift the address forward by one element.
719    llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720        SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
721        "omp.arraycpy.dest.element");
722    SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
723  }
724
725  // Shift the address forward by one element.
726  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727      DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
728      "omp.arraycpy.dest.element");
729  // Check whether we've reached the end.
730  llvm::Value *Done =
731      CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
732  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
733  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
734
735  // Done.
736  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
737}
738
739LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740  return CGF.EmitOMPSharedLValue(E);
741}
742
743LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744                                            const Expr *E) {
745  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
746    return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
747  return LValue();
748}
749
750void ReductionCodeGen::emitAggregateInitialization(
751    CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752    const OMPDeclareReductionDecl *DRD) {
753  // Emit VarDecl with copy init for arrays.
754  // Get the address of the original variable captured in current
755  // captured region.
756  const auto *PrivateVD =
757      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
758  bool EmitDeclareReductionInit =
759      DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
761                       EmitDeclareReductionInit,
762                       EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763                                                : PrivateVD->getInit(),
764                       DRD, SharedAddr);
765}
766
767ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
768                                   ArrayRef<const Expr *> Origs,
769                                   ArrayRef<const Expr *> Privates,
770                                   ArrayRef<const Expr *> ReductionOps) {
771  ClausesData.reserve(Shareds.size());
772  SharedAddresses.reserve(Shareds.size());
773  Sizes.reserve(Shareds.size());
774  BaseDecls.reserve(Shareds.size());
775  const auto *IOrig = Origs.begin();
776  const auto *IPriv = Privates.begin();
777  const auto *IRed = ReductionOps.begin();
778  for (const Expr *Ref : Shareds) {
779    ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
780    std::advance(IOrig, 1);
781    std::advance(IPriv, 1);
782    std::advance(IRed, 1);
783  }
784}
785
786void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
787  assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788         "Number of generated lvalues must be exactly N.");
789  LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
790  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
791  SharedAddresses.emplace_back(First, Second);
792  if (ClausesData[N].Shared == ClausesData[N].Ref) {
793    OrigAddresses.emplace_back(First, Second);
794  } else {
795    LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
796    LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
797    OrigAddresses.emplace_back(First, Second);
798  }
799}
800
801void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
802  QualType PrivateType = getPrivateType(N);
803  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
804  if (!PrivateType->isVariablyModifiedType()) {
805    Sizes.emplace_back(
806        CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
807        nullptr);
808    return;
809  }
810  llvm::Value *Size;
811  llvm::Value *SizeInChars;
812  auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
813  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
814  if (AsArraySection) {
815    Size = CGF.Builder.CreatePtrDiff(ElemType,
816                                     OrigAddresses[N].second.getPointer(CGF),
817                                     OrigAddresses[N].first.getPointer(CGF));
818    Size = CGF.Builder.CreateNUWAdd(
819        Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
820    SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
821  } else {
822    SizeInChars =
823        CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
824    Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
825  }
826  Sizes.emplace_back(SizeInChars, Size);
827  CodeGenFunction::OpaqueValueMapping OpaqueMap(
828      CGF,
829      cast<OpaqueValueExpr>(
830          CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
831      RValue::get(Size));
832  CGF.EmitVariablyModifiedType(PrivateType);
833}
834
835void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
836                                         llvm::Value *Size) {
837  QualType PrivateType = getPrivateType(N);
838  if (!PrivateType->isVariablyModifiedType()) {
839    assert(!Size && !Sizes[N].second &&
840           "Size should be nullptr for non-variably modified reduction "
841           "items.");
842    return;
843  }
844  CodeGenFunction::OpaqueValueMapping OpaqueMap(
845      CGF,
846      cast<OpaqueValueExpr>(
847          CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848      RValue::get(Size));
849  CGF.EmitVariablyModifiedType(PrivateType);
850}
851
852void ReductionCodeGen::emitInitialization(
853    CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
854    llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
855  assert(SharedAddresses.size() > N && "No variable was generated");
856  const auto *PrivateVD =
857      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
858  const OMPDeclareReductionDecl *DRD =
859      getReductionInit(ClausesData[N].ReductionOp);
860  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
861    if (DRD && DRD->getInitializer())
862      (void)DefaultInit(CGF);
863    emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
864  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
865    (void)DefaultInit(CGF);
866    QualType SharedType = SharedAddresses[N].first.getType();
867    emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
868                                     PrivateAddr, SharedAddr, SharedType);
869  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
870             !CGF.isTrivialInitializer(PrivateVD->getInit())) {
871    CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
872                         PrivateVD->getType().getQualifiers(),
873                         /*IsInitializer=*/false);
874  }
875}
876
877bool ReductionCodeGen::needCleanups(unsigned N) {
878  QualType PrivateType = getPrivateType(N);
879  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
880  return DTorKind != QualType::DK_none;
881}
882
883void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
884                                    Address PrivateAddr) {
885  QualType PrivateType = getPrivateType(N);
886  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
887  if (needCleanups(N)) {
888    PrivateAddr =
889        PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
890    CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
891  }
892}
893
894static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
895                          LValue BaseLV) {
896  BaseTy = BaseTy.getNonReferenceType();
897  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
898         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
899    if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
900      BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
901    } else {
902      LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
903      BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
904    }
905    BaseTy = BaseTy->getPointeeType();
906  }
907  return CGF.MakeAddrLValue(
908      BaseLV.getAddress(CGF).withElementType(CGF.ConvertTypeForMem(ElTy)),
909      BaseLV.getType(), BaseLV.getBaseInfo(),
910      CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
911}
912
913static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
914                          Address OriginalBaseAddress, llvm::Value *Addr) {
915  Address Tmp = Address::invalid();
916  Address TopTmp = Address::invalid();
917  Address MostTopTmp = Address::invalid();
918  BaseTy = BaseTy.getNonReferenceType();
919  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
920         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
921    Tmp = CGF.CreateMemTemp(BaseTy);
922    if (TopTmp.isValid())
923      CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
924    else
925      MostTopTmp = Tmp;
926    TopTmp = Tmp;
927    BaseTy = BaseTy->getPointeeType();
928  }
929
930  if (Tmp.isValid()) {
931    Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
932        Addr, Tmp.getElementType());
933    CGF.Builder.CreateStore(Addr, Tmp);
934    return MostTopTmp;
935  }
936
937  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
938      Addr, OriginalBaseAddress.getType());
939  return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
940}
941
942static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
943  const VarDecl *OrigVD = nullptr;
944  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
945    const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
946    while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
947      Base = TempOASE->getBase()->IgnoreParenImpCasts();
948    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
949      Base = TempASE->getBase()->IgnoreParenImpCasts();
950    DE = cast<DeclRefExpr>(Base);
951    OrigVD = cast<VarDecl>(DE->getDecl());
952  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
953    const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
954    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
955      Base = TempASE->getBase()->IgnoreParenImpCasts();
956    DE = cast<DeclRefExpr>(Base);
957    OrigVD = cast<VarDecl>(DE->getDecl());
958  }
959  return OrigVD;
960}
961
962Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
963                                               Address PrivateAddr) {
964  const DeclRefExpr *DE;
965  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
966    BaseDecls.emplace_back(OrigVD);
967    LValue OriginalBaseLValue = CGF.EmitLValue(DE);
968    LValue BaseLValue =
969        loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
970                    OriginalBaseLValue);
971    Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
972    llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
973        SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
974        SharedAddr.getPointer());
975    llvm::Value *PrivatePointer =
976        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
977            PrivateAddr.getPointer(), SharedAddr.getType());
978    llvm::Value *Ptr = CGF.Builder.CreateGEP(
979        SharedAddr.getElementType(), PrivatePointer, Adjustment);
980    return castToBase(CGF, OrigVD->getType(),
981                      SharedAddresses[N].first.getType(),
982                      OriginalBaseLValue.getAddress(CGF), Ptr);
983  }
984  BaseDecls.emplace_back(
985      cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
986  return PrivateAddr;
987}
988
989bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
990  const OMPDeclareReductionDecl *DRD =
991      getReductionInit(ClausesData[N].ReductionOp);
992  return DRD && DRD->getInitializer();
993}
994
995LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
996  return CGF.EmitLoadOfPointerLValue(
997      CGF.GetAddrOfLocalVar(getThreadIDVariable()),
998      getThreadIDVariable()->getType()->castAs<PointerType>());
999}
1000
1001void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1002  if (!CGF.HaveInsertPoint())
1003    return;
1004  // 1.2.2 OpenMP Language Terminology
1005  // Structured block - An executable statement with a single entry at the
1006  // top and a single exit at the bottom.
1007  // The point of exit cannot be a branch out of the structured block.
1008  // longjmp() and throw() must not violate the entry/exit criteria.
1009  CGF.EHStack.pushTerminate();
1010  if (S)
1011    CGF.incrementProfileCounter(S);
1012  CodeGen(CGF);
1013  CGF.EHStack.popTerminate();
1014}
1015
1016LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1017    CodeGenFunction &CGF) {
1018  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1019                            getThreadIDVariable()->getType(),
1020                            AlignmentSource::Decl);
1021}
1022
1023static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1024                                       QualType FieldTy) {
1025  auto *Field = FieldDecl::Create(
1026      C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1027      C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1028      /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1029  Field->setAccess(AS_public);
1030  DC->addDecl(Field);
1031  return Field;
1032}
1033
1034CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1035    : CGM(CGM), OMPBuilder(CGM.getModule()) {
1036  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1037  llvm::OpenMPIRBuilderConfig Config(
1038      CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1039      CGM.getLangOpts().OpenMPOffloadMandatory,
1040      /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1041      hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1042  OMPBuilder.initialize();
1043  OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1044                                         ? CGM.getLangOpts().OMPHostIRFile
1045                                         : StringRef{});
1046  OMPBuilder.setConfig(Config);
1047
1048  // The user forces the compiler to behave as if omp requires
1049  // unified_shared_memory was given.
1050  if (CGM.getLangOpts().OpenMPForceUSM) {
1051    HasRequiresUnifiedSharedMemory = true;
1052    OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1053  }
1054}
1055
1056void CGOpenMPRuntime::clear() {
1057  InternalVars.clear();
1058  // Clean non-target variable declarations possibly used only in debug info.
1059  for (const auto &Data : EmittedNonTargetVariables) {
1060    if (!Data.getValue().pointsToAliveValue())
1061      continue;
1062    auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1063    if (!GV)
1064      continue;
1065    if (!GV->isDeclaration() || GV->getNumUses() > 0)
1066      continue;
1067    GV->eraseFromParent();
1068  }
1069}
1070
1071std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1072  return OMPBuilder.createPlatformSpecificName(Parts);
1073}
1074
1075static llvm::Function *
1076emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1077                          const Expr *CombinerInitializer, const VarDecl *In,
1078                          const VarDecl *Out, bool IsCombiner) {
1079  // void .omp_combiner.(Ty *in, Ty *out);
1080  ASTContext &C = CGM.getContext();
1081  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1082  FunctionArgList Args;
1083  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1084                               /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1085  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1086                              /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087  Args.push_back(&OmpOutParm);
1088  Args.push_back(&OmpInParm);
1089  const CGFunctionInfo &FnInfo =
1090      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1091  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1092  std::string Name = CGM.getOpenMPRuntime().getName(
1093      {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1094  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1095                                    Name, &CGM.getModule());
1096  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1097  if (CGM.getLangOpts().Optimize) {
1098    Fn->removeFnAttr(llvm::Attribute::NoInline);
1099    Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1100    Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1101  }
1102  CodeGenFunction CGF(CGM);
1103  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1104  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1105  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1106                    Out->getLocation());
1107  CodeGenFunction::OMPPrivateScope Scope(CGF);
1108  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1109  Scope.addPrivate(
1110      In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1111              .getAddress(CGF));
1112  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1113  Scope.addPrivate(
1114      Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1115               .getAddress(CGF));
1116  (void)Scope.Privatize();
1117  if (!IsCombiner && Out->hasInit() &&
1118      !CGF.isTrivialInitializer(Out->getInit())) {
1119    CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1120                         Out->getType().getQualifiers(),
1121                         /*IsInitializer=*/true);
1122  }
1123  if (CombinerInitializer)
1124    CGF.EmitIgnoredExpr(CombinerInitializer);
1125  Scope.ForceCleanup();
1126  CGF.FinishFunction();
1127  return Fn;
1128}
1129
1130void CGOpenMPRuntime::emitUserDefinedReduction(
1131    CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1132  if (UDRMap.count(D) > 0)
1133    return;
1134  llvm::Function *Combiner = emitCombinerOrInitializer(
1135      CGM, D->getType(), D->getCombiner(),
1136      cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1137      cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1138      /*IsCombiner=*/true);
1139  llvm::Function *Initializer = nullptr;
1140  if (const Expr *Init = D->getInitializer()) {
1141    Initializer = emitCombinerOrInitializer(
1142        CGM, D->getType(),
1143        D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1144                                                                     : nullptr,
1145        cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1146        cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1147        /*IsCombiner=*/false);
1148  }
1149  UDRMap.try_emplace(D, Combiner, Initializer);
1150  if (CGF) {
1151    auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1152    Decls.second.push_back(D);
1153  }
1154}
1155
1156std::pair<llvm::Function *, llvm::Function *>
1157CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1158  auto I = UDRMap.find(D);
1159  if (I != UDRMap.end())
1160    return I->second;
1161  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1162  return UDRMap.lookup(D);
1163}
1164
1165namespace {
1166// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1167// Builder if one is present.
1168struct PushAndPopStackRAII {
1169  PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1170                      bool HasCancel, llvm::omp::Directive Kind)
1171      : OMPBuilder(OMPBuilder) {
1172    if (!OMPBuilder)
1173      return;
1174
1175    // The following callback is the crucial part of clangs cleanup process.
1176    //
1177    // NOTE:
1178    // Once the OpenMPIRBuilder is used to create parallel regions (and
1179    // similar), the cancellation destination (Dest below) is determined via
1180    // IP. That means if we have variables to finalize we split the block at IP,
1181    // use the new block (=BB) as destination to build a JumpDest (via
1182    // getJumpDestInCurrentScope(BB)) which then is fed to
1183    // EmitBranchThroughCleanup. Furthermore, there will not be the need
1184    // to push & pop an FinalizationInfo object.
1185    // The FiniCB will still be needed but at the point where the
1186    // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1187    auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1188      assert(IP.getBlock()->end() == IP.getPoint() &&
1189             "Clang CG should cause non-terminated block!");
1190      CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1191      CGF.Builder.restoreIP(IP);
1192      CodeGenFunction::JumpDest Dest =
1193          CGF.getOMPCancelDestination(OMPD_parallel);
1194      CGF.EmitBranchThroughCleanup(Dest);
1195    };
1196
1197    // TODO: Remove this once we emit parallel regions through the
1198    //       OpenMPIRBuilder as it can do this setup internally.
1199    llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1200    OMPBuilder->pushFinalizationCB(std::move(FI));
1201  }
1202  ~PushAndPopStackRAII() {
1203    if (OMPBuilder)
1204      OMPBuilder->popFinalizationCB();
1205  }
1206  llvm::OpenMPIRBuilder *OMPBuilder;
1207};
1208} // namespace
1209
1210static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1211    CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1212    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1213    const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1214  assert(ThreadIDVar->getType()->isPointerType() &&
1215         "thread id variable must be of type kmp_int32 *");
1216  CodeGenFunction CGF(CGM, true);
1217  bool HasCancel = false;
1218  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1219    HasCancel = OPD->hasCancel();
1220  else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1221    HasCancel = OPD->hasCancel();
1222  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1223    HasCancel = OPSD->hasCancel();
1224  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1225    HasCancel = OPFD->hasCancel();
1226  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1227    HasCancel = OPFD->hasCancel();
1228  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1229    HasCancel = OPFD->hasCancel();
1230  else if (const auto *OPFD =
1231               dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1232    HasCancel = OPFD->hasCancel();
1233  else if (const auto *OPFD =
1234               dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1235    HasCancel = OPFD->hasCancel();
1236
1237  // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1238  //       parallel region to make cancellation barriers work properly.
1239  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1240  PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1241  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1242                                    HasCancel, OutlinedHelperName);
1243  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1244  return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1245}
1246
1247std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1248  std::string Suffix = getName({"omp_outlined"});
1249  return (Name + Suffix).str();
1250}
1251
1252std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1253  return getOutlinedHelperName(CGF.CurFn->getName());
1254}
1255
1256std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1257  std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1258  return (Name + Suffix).str();
1259}
1260
1261llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1262    CodeGenFunction &CGF, const OMPExecutableDirective &D,
1263    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1264    const RegionCodeGenTy &CodeGen) {
1265  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1266  return emitParallelOrTeamsOutlinedFunction(
1267      CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1268      CodeGen);
1269}
1270
1271llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1272    CodeGenFunction &CGF, const OMPExecutableDirective &D,
1273    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1274    const RegionCodeGenTy &CodeGen) {
1275  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1276  return emitParallelOrTeamsOutlinedFunction(
1277      CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1278      CodeGen);
1279}
1280
1281llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1282    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1283    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1284    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1285    bool Tied, unsigned &NumberOfParts) {
1286  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1287                                              PrePostActionTy &) {
1288    llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1289    llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1290    llvm::Value *TaskArgs[] = {
1291        UpLoc, ThreadID,
1292        CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1293                                    TaskTVar->getType()->castAs<PointerType>())
1294            .getPointer(CGF)};
1295    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1296                            CGM.getModule(), OMPRTL___kmpc_omp_task),
1297                        TaskArgs);
1298  };
1299  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1300                                                            UntiedCodeGen);
1301  CodeGen.setAction(Action);
1302  assert(!ThreadIDVar->getType()->isPointerType() &&
1303         "thread id variable must be of type kmp_int32 for tasks");
1304  const OpenMPDirectiveKind Region =
1305      isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1306                                                      : OMPD_task;
1307  const CapturedStmt *CS = D.getCapturedStmt(Region);
1308  bool HasCancel = false;
1309  if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1310    HasCancel = TD->hasCancel();
1311  else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1312    HasCancel = TD->hasCancel();
1313  else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1314    HasCancel = TD->hasCancel();
1315  else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1316    HasCancel = TD->hasCancel();
1317
1318  CodeGenFunction CGF(CGM, true);
1319  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1320                                        InnermostKind, HasCancel, Action);
1321  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1322  llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1323  if (!Tied)
1324    NumberOfParts = Action.getNumberOfParts();
1325  return Res;
1326}
1327
1328void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1329                                             bool AtCurrentPoint) {
1330  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1331  assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1332
1333  llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1334  if (AtCurrentPoint) {
1335    Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1336        Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1337  } else {
1338    Elem.second.ServiceInsertPt =
1339        new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1340    Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1341  }
1342}
1343
1344void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1345  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1346  if (Elem.second.ServiceInsertPt) {
1347    llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1348    Elem.second.ServiceInsertPt = nullptr;
1349    Ptr->eraseFromParent();
1350  }
1351}
1352
1353static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1354                                                  SourceLocation Loc,
1355                                                  SmallString<128> &Buffer) {
1356  llvm::raw_svector_ostream OS(Buffer);
1357  // Build debug location
1358  PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1359  OS << ";" << PLoc.getFilename() << ";";
1360  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1361    OS << FD->getQualifiedNameAsString();
1362  OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1363  return OS.str();
1364}
1365
1366llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1367                                                 SourceLocation Loc,
1368                                                 unsigned Flags, bool EmitLoc) {
1369  uint32_t SrcLocStrSize;
1370  llvm::Constant *SrcLocStr;
1371  if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1372                       llvm::codegenoptions::NoDebugInfo) ||
1373      Loc.isInvalid()) {
1374    SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1375  } else {
1376    std::string FunctionName;
1377    if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1378      FunctionName = FD->getQualifiedNameAsString();
1379    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1380    const char *FileName = PLoc.getFilename();
1381    unsigned Line = PLoc.getLine();
1382    unsigned Column = PLoc.getColumn();
1383    SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1384                                                Column, SrcLocStrSize);
1385  }
1386  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1387  return OMPBuilder.getOrCreateIdent(
1388      SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1389}
1390
1391llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1392                                          SourceLocation Loc) {
1393  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1394  // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1395  // the clang invariants used below might be broken.
1396  if (CGM.getLangOpts().OpenMPIRBuilder) {
1397    SmallString<128> Buffer;
1398    OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1399    uint32_t SrcLocStrSize;
1400    auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1401        getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1402    return OMPBuilder.getOrCreateThreadID(
1403        OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1404  }
1405
1406  llvm::Value *ThreadID = nullptr;
1407  // Check whether we've already cached a load of the thread id in this
1408  // function.
1409  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1410  if (I != OpenMPLocThreadIDMap.end()) {
1411    ThreadID = I->second.ThreadID;
1412    if (ThreadID != nullptr)
1413      return ThreadID;
1414  }
1415  // If exceptions are enabled, do not use parameter to avoid possible crash.
1416  if (auto *OMPRegionInfo =
1417          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1418    if (OMPRegionInfo->getThreadIDVariable()) {
1419      // Check if this an outlined function with thread id passed as argument.
1420      LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1421      llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1422      if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1423          !CGF.getLangOpts().CXXExceptions ||
1424          CGF.Builder.GetInsertBlock() == TopBlock ||
1425          !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1426          cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1427              TopBlock ||
1428          cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1429              CGF.Builder.GetInsertBlock()) {
1430        ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1431        // If value loaded in entry block, cache it and use it everywhere in
1432        // function.
1433        if (CGF.Builder.GetInsertBlock() == TopBlock) {
1434          auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1435          Elem.second.ThreadID = ThreadID;
1436        }
1437        return ThreadID;
1438      }
1439    }
1440  }
1441
1442  // This is not an outlined function region - need to call __kmpc_int32
1443  // kmpc_global_thread_num(ident_t *loc).
1444  // Generate thread id value and cache this value for use across the
1445  // function.
1446  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1447  if (!Elem.second.ServiceInsertPt)
1448    setLocThreadIdInsertPt(CGF);
1449  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1450  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1451  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
1452  llvm::CallInst *Call = CGF.Builder.CreateCall(
1453      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1454                                            OMPRTL___kmpc_global_thread_num),
1455      emitUpdateLocation(CGF, Loc));
1456  Call->setCallingConv(CGF.getRuntimeCC());
1457  Elem.second.ThreadID = Call;
1458  return Call;
1459}
1460
1461void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1462  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1463  if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1464    clearLocThreadIdInsertPt(CGF);
1465    OpenMPLocThreadIDMap.erase(CGF.CurFn);
1466  }
1467  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1468    for(const auto *D : FunctionUDRMap[CGF.CurFn])
1469      UDRMap.erase(D);
1470    FunctionUDRMap.erase(CGF.CurFn);
1471  }
1472  auto I = FunctionUDMMap.find(CGF.CurFn);
1473  if (I != FunctionUDMMap.end()) {
1474    for(const auto *D : I->second)
1475      UDMMap.erase(D);
1476    FunctionUDMMap.erase(I);
1477  }
1478  LastprivateConditionalToTypes.erase(CGF.CurFn);
1479  FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1480}
1481
1482llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1483  return OMPBuilder.IdentPtr;
1484}
1485
1486llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1487  if (!Kmpc_MicroTy) {
1488    // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1489    llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1490                                 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1491    Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1492  }
1493  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1494}
1495
1496llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1497convertDeviceClause(const VarDecl *VD) {
1498  std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1499      OMPDeclareTargetDeclAttr::getDeviceType(VD);
1500  if (!DevTy)
1501    return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1502
1503  switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1504  case OMPDeclareTargetDeclAttr::DT_Host:
1505    return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1506    break;
1507  case OMPDeclareTargetDeclAttr::DT_NoHost:
1508    return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1509    break;
1510  case OMPDeclareTargetDeclAttr::DT_Any:
1511    return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1512    break;
1513  default:
1514    return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1515    break;
1516  }
1517}
1518
1519llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1520convertCaptureClause(const VarDecl *VD) {
1521  std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1522      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1523  if (!MapType)
1524    return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1525  switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1526  case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1527    return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1528    break;
1529  case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1530    return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1531    break;
1532  case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1533    return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1534    break;
1535  default:
1536    return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1537    break;
1538  }
1539}
1540
1541static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1542    CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1543    SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1544
1545  auto FileInfoCallBack = [&]() {
1546    SourceManager &SM = CGM.getContext().getSourceManager();
1547    PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1548
1549    llvm::sys::fs::UniqueID ID;
1550    if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1551      PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1552    }
1553
1554    return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1555  };
1556
1557  return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1558}
1559
1560Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1561  auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1562
1563  auto LinkageForVariable = [&VD, this]() {
1564    return CGM.getLLVMLinkageVarDefinition(VD);
1565  };
1566
1567  std::vector<llvm::GlobalVariable *> GeneratedRefs;
1568
1569  llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1570      CGM.getContext().getPointerType(VD->getType()));
1571  llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1572      convertCaptureClause(VD), convertDeviceClause(VD),
1573      VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1574      VD->isExternallyVisible(),
1575      getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1576                                  VD->getCanonicalDecl()->getBeginLoc()),
1577      CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1578      CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1579      LinkageForVariable);
1580
1581  if (!addr)
1582    return Address::invalid();
1583  return Address(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1584}
1585
1586llvm::Constant *
1587CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1588  assert(!CGM.getLangOpts().OpenMPUseTLS ||
1589         !CGM.getContext().getTargetInfo().isTLSSupported());
1590  // Lookup the entry, lazily creating it if necessary.
1591  std::string Suffix = getName({"cache", ""});
1592  return OMPBuilder.getOrCreateInternalVariable(
1593      CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1594}
1595
1596Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1597                                                const VarDecl *VD,
1598                                                Address VDAddr,
1599                                                SourceLocation Loc) {
1600  if (CGM.getLangOpts().OpenMPUseTLS &&
1601      CGM.getContext().getTargetInfo().isTLSSupported())
1602    return VDAddr;
1603
1604  llvm::Type *VarTy = VDAddr.getElementType();
1605  llvm::Value *Args[] = {
1606      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1607      CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1608      CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1609      getOrCreateThreadPrivateCache(VD)};
1610  return Address(
1611      CGF.EmitRuntimeCall(
1612          OMPBuilder.getOrCreateRuntimeFunction(
1613              CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1614          Args),
1615      CGF.Int8Ty, VDAddr.getAlignment());
1616}
1617
1618void CGOpenMPRuntime::emitThreadPrivateVarInit(
1619    CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1620    llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1621  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1622  // library.
1623  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1624  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1625                          CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1626                      OMPLoc);
1627  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1628  // to register constructor/destructor for variable.
1629  llvm::Value *Args[] = {
1630      OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1631      Ctor, CopyCtor, Dtor};
1632  CGF.EmitRuntimeCall(
1633      OMPBuilder.getOrCreateRuntimeFunction(
1634          CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1635      Args);
1636}
1637
1638llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1639    const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1640    bool PerformInit, CodeGenFunction *CGF) {
1641  if (CGM.getLangOpts().OpenMPUseTLS &&
1642      CGM.getContext().getTargetInfo().isTLSSupported())
1643    return nullptr;
1644
1645  VD = VD->getDefinition(CGM.getContext());
1646  if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1647    QualType ASTTy = VD->getType();
1648
1649    llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1650    const Expr *Init = VD->getAnyInitializer();
1651    if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1652      // Generate function that re-emits the declaration's initializer into the
1653      // threadprivate copy of the variable VD
1654      CodeGenFunction CtorCGF(CGM);
1655      FunctionArgList Args;
1656      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1657                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1658                            ImplicitParamKind::Other);
1659      Args.push_back(&Dst);
1660
1661      const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1662          CGM.getContext().VoidPtrTy, Args);
1663      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1664      std::string Name = getName({"__kmpc_global_ctor_", ""});
1665      llvm::Function *Fn =
1666          CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1667      CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1668                            Args, Loc, Loc);
1669      llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1670          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1671          CGM.getContext().VoidPtrTy, Dst.getLocation());
1672      Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1673                  VDAddr.getAlignment());
1674      CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1675                               /*IsInitializer=*/true);
1676      ArgVal = CtorCGF.EmitLoadOfScalar(
1677          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1678          CGM.getContext().VoidPtrTy, Dst.getLocation());
1679      CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1680      CtorCGF.FinishFunction();
1681      Ctor = Fn;
1682    }
1683    if (VD->getType().isDestructedType() != QualType::DK_none) {
1684      // Generate function that emits destructor call for the threadprivate copy
1685      // of the variable VD
1686      CodeGenFunction DtorCGF(CGM);
1687      FunctionArgList Args;
1688      ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1689                            /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1690                            ImplicitParamKind::Other);
1691      Args.push_back(&Dst);
1692
1693      const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1694          CGM.getContext().VoidTy, Args);
1695      llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1696      std::string Name = getName({"__kmpc_global_dtor_", ""});
1697      llvm::Function *Fn =
1698          CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1699      auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1700      DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1701                            Loc, Loc);
1702      // Create a scope with an artificial location for the body of this function.
1703      auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1704      llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1705          DtorCGF.GetAddrOfLocalVar(&Dst),
1706          /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1707      DtorCGF.emitDestroy(
1708          Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1709          DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1710          DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1711      DtorCGF.FinishFunction();
1712      Dtor = Fn;
1713    }
1714    // Do not emit init function if it is not required.
1715    if (!Ctor && !Dtor)
1716      return nullptr;
1717
1718    llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1719    auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1720                                               /*isVarArg=*/false)
1721                           ->getPointerTo();
1722    // Copying constructor for the threadprivate variable.
1723    // Must be NULL - reserved by runtime, but currently it requires that this
1724    // parameter is always NULL. Otherwise it fires assertion.
1725    CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1726    if (Ctor == nullptr) {
1727      auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1728                                             /*isVarArg=*/false)
1729                         ->getPointerTo();
1730      Ctor = llvm::Constant::getNullValue(CtorTy);
1731    }
1732    if (Dtor == nullptr) {
1733      auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1734                                             /*isVarArg=*/false)
1735                         ->getPointerTo();
1736      Dtor = llvm::Constant::getNullValue(DtorTy);
1737    }
1738    if (!CGF) {
1739      auto *InitFunctionTy =
1740          llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1741      std::string Name = getName({"__omp_threadprivate_init_", ""});
1742      llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1743          InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1744      CodeGenFunction InitCGF(CGM);
1745      FunctionArgList ArgList;
1746      InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1747                            CGM.getTypes().arrangeNullaryFunction(), ArgList,
1748                            Loc, Loc);
1749      emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1750      InitCGF.FinishFunction();
1751      return InitFunction;
1752    }
1753    emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1754  }
1755  return nullptr;
1756}
1757
1758void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1759                                                llvm::GlobalValue *GV) {
1760  std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1761      OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1762
1763  // We only need to handle active 'indirect' declare target functions.
1764  if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1765    return;
1766
1767  // Get a mangled name to store the new device global in.
1768  llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1769      CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1770  SmallString<128> Name;
1771  OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1772
1773  // We need to generate a new global to hold the address of the indirectly
1774  // called device function. Doing this allows us to keep the visibility and
1775  // linkage of the associated function unchanged while allowing the runtime to
1776  // access its value.
1777  llvm::GlobalValue *Addr = GV;
1778  if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1779    Addr = new llvm::GlobalVariable(
1780        CGM.getModule(), CGM.VoidPtrTy,
1781        /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1782        nullptr, llvm::GlobalValue::NotThreadLocal,
1783        CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1784    Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1785  }
1786
1787  OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1788      Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1789      llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1790      llvm::GlobalValue::WeakODRLinkage);
1791}
1792
1793Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1794                                                          QualType VarType,
1795                                                          StringRef Name) {
1796  std::string Suffix = getName({"artificial", ""});
1797  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1798  llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1799      VarLVType, Twine(Name).concat(Suffix).str());
1800  if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1801      CGM.getTarget().isTLSSupported()) {
1802    GAddr->setThreadLocal(/*Val=*/true);
1803    return Address(GAddr, GAddr->getValueType(),
1804                   CGM.getContext().getTypeAlignInChars(VarType));
1805  }
1806  std::string CacheSuffix = getName({"cache", ""});
1807  llvm::Value *Args[] = {
1808      emitUpdateLocation(CGF, SourceLocation()),
1809      getThreadID(CGF, SourceLocation()),
1810      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1811      CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1812                                /*isSigned=*/false),
1813      OMPBuilder.getOrCreateInternalVariable(
1814          CGM.VoidPtrPtrTy,
1815          Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1816  return Address(
1817      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1818          CGF.EmitRuntimeCall(
1819              OMPBuilder.getOrCreateRuntimeFunction(
1820                  CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1821              Args),
1822          VarLVType->getPointerTo(/*AddrSpace=*/0)),
1823      VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1824}
1825
1826void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1827                                   const RegionCodeGenTy &ThenGen,
1828                                   const RegionCodeGenTy &ElseGen) {
1829  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1830
1831  // If the condition constant folds and can be elided, try to avoid emitting
1832  // the condition and the dead arm of the if/else.
1833  bool CondConstant;
1834  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1835    if (CondConstant)
1836      ThenGen(CGF);
1837    else
1838      ElseGen(CGF);
1839    return;
1840  }
1841
1842  // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1843  // emit the conditional branch.
1844  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1845  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1846  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1847  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1848
1849  // Emit the 'then' code.
1850  CGF.EmitBlock(ThenBlock);
1851  ThenGen(CGF);
1852  CGF.EmitBranch(ContBlock);
1853  // Emit the 'else' code if present.
1854  // There is no need to emit line number for unconditional branch.
1855  (void)ApplyDebugLocation::CreateEmpty(CGF);
1856  CGF.EmitBlock(ElseBlock);
1857  ElseGen(CGF);
1858  // There is no need to emit line number for unconditional branch.
1859  (void)ApplyDebugLocation::CreateEmpty(CGF);
1860  CGF.EmitBranch(ContBlock);
1861  // Emit the continuation block for code after the if.
1862  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1863}
1864
1865void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1866                                       llvm::Function *OutlinedFn,
1867                                       ArrayRef<llvm::Value *> CapturedVars,
1868                                       const Expr *IfCond,
1869                                       llvm::Value *NumThreads) {
1870  if (!CGF.HaveInsertPoint())
1871    return;
1872  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1873  auto &M = CGM.getModule();
1874  auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1875                    this](CodeGenFunction &CGF, PrePostActionTy &) {
1876    // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1877    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1878    llvm::Value *Args[] = {
1879        RTLoc,
1880        CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1881        CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1882    llvm::SmallVector<llvm::Value *, 16> RealArgs;
1883    RealArgs.append(std::begin(Args), std::end(Args));
1884    RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1885
1886    llvm::FunctionCallee RTLFn =
1887        OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1888    CGF.EmitRuntimeCall(RTLFn, RealArgs);
1889  };
1890  auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1891                    this](CodeGenFunction &CGF, PrePostActionTy &) {
1892    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1893    llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1894    // Build calls:
1895    // __kmpc_serialized_parallel(&Loc, GTid);
1896    llvm::Value *Args[] = {RTLoc, ThreadID};
1897    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1898                            M, OMPRTL___kmpc_serialized_parallel),
1899                        Args);
1900
1901    // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1902    Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1903    Address ZeroAddrBound =
1904        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1905                                         /*Name=*/".bound.zero.addr");
1906    CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1907    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1908    // ThreadId for serialized parallels is 0.
1909    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1910    OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1911    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1912
1913    // Ensure we do not inline the function. This is trivially true for the ones
1914    // passed to __kmpc_fork_call but the ones called in serialized regions
1915    // could be inlined. This is not a perfect but it is closer to the invariant
1916    // we want, namely, every data environment starts with a new function.
1917    // TODO: We should pass the if condition to the runtime function and do the
1918    //       handling there. Much cleaner code.
1919    OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1920    OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1921    RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1922
1923    // __kmpc_end_serialized_parallel(&Loc, GTid);
1924    llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1925    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1926                            M, OMPRTL___kmpc_end_serialized_parallel),
1927                        EndArgs);
1928  };
1929  if (IfCond) {
1930    emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1931  } else {
1932    RegionCodeGenTy ThenRCG(ThenGen);
1933    ThenRCG(CGF);
1934  }
1935}
1936
1937// If we're inside an (outlined) parallel region, use the region info's
1938// thread-ID variable (it is passed in a first argument of the outlined function
1939// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1940// regular serial code region, get thread ID by calling kmp_int32
1941// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1942// return the address of that temp.
1943Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1944                                             SourceLocation Loc) {
1945  if (auto *OMPRegionInfo =
1946          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1947    if (OMPRegionInfo->getThreadIDVariable())
1948      return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
1949
1950  llvm::Value *ThreadID = getThreadID(CGF, Loc);
1951  QualType Int32Ty =
1952      CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1953  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1954  CGF.EmitStoreOfScalar(ThreadID,
1955                        CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1956
1957  return ThreadIDTemp;
1958}
1959
1960llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1961  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1962  std::string Name = getName({Prefix, "var"});
1963  return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1964}
1965
1966namespace {
1967/// Common pre(post)-action for different OpenMP constructs.
1968class CommonActionTy final : public PrePostActionTy {
1969  llvm::FunctionCallee EnterCallee;
1970  ArrayRef<llvm::Value *> EnterArgs;
1971  llvm::FunctionCallee ExitCallee;
1972  ArrayRef<llvm::Value *> ExitArgs;
1973  bool Conditional;
1974  llvm::BasicBlock *ContBlock = nullptr;
1975
1976public:
1977  CommonActionTy(llvm::FunctionCallee EnterCallee,
1978                 ArrayRef<llvm::Value *> EnterArgs,
1979                 llvm::FunctionCallee ExitCallee,
1980                 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1981      : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1982        ExitArgs(ExitArgs), Conditional(Conditional) {}
1983  void Enter(CodeGenFunction &CGF) override {
1984    llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1985    if (Conditional) {
1986      llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1987      auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1988      ContBlock = CGF.createBasicBlock("omp_if.end");
1989      // Generate the branch (If-stmt)
1990      CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1991      CGF.EmitBlock(ThenBlock);
1992    }
1993  }
1994  void Done(CodeGenFunction &CGF) {
1995    // Emit the rest of blocks/branches
1996    CGF.EmitBranch(ContBlock);
1997    CGF.EmitBlock(ContBlock, true);
1998  }
1999  void Exit(CodeGenFunction &CGF) override {
2000    CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2001  }
2002};
2003} // anonymous namespace
2004
2005void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2006                                         StringRef CriticalName,
2007                                         const RegionCodeGenTy &CriticalOpGen,
2008                                         SourceLocation Loc, const Expr *Hint) {
2009  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2010  // CriticalOpGen();
2011  // __kmpc_end_critical(ident_t *, gtid, Lock);
2012  // Prepare arguments and build a call to __kmpc_critical
2013  if (!CGF.HaveInsertPoint())
2014    return;
2015  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2016                         getCriticalRegionLock(CriticalName)};
2017  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2018                                                std::end(Args));
2019  if (Hint) {
2020    EnterArgs.push_back(CGF.Builder.CreateIntCast(
2021        CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2022  }
2023  CommonActionTy Action(
2024      OMPBuilder.getOrCreateRuntimeFunction(
2025          CGM.getModule(),
2026          Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2027      EnterArgs,
2028      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2029                                            OMPRTL___kmpc_end_critical),
2030      Args);
2031  CriticalOpGen.setAction(Action);
2032  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2033}
2034
2035void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2036                                       const RegionCodeGenTy &MasterOpGen,
2037                                       SourceLocation Loc) {
2038  if (!CGF.HaveInsertPoint())
2039    return;
2040  // if(__kmpc_master(ident_t *, gtid)) {
2041  //   MasterOpGen();
2042  //   __kmpc_end_master(ident_t *, gtid);
2043  // }
2044  // Prepare arguments and build a call to __kmpc_master
2045  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2046  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2047                            CGM.getModule(), OMPRTL___kmpc_master),
2048                        Args,
2049                        OMPBuilder.getOrCreateRuntimeFunction(
2050                            CGM.getModule(), OMPRTL___kmpc_end_master),
2051                        Args,
2052                        /*Conditional=*/true);
2053  MasterOpGen.setAction(Action);
2054  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2055  Action.Done(CGF);
2056}
2057
2058void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2059                                       const RegionCodeGenTy &MaskedOpGen,
2060                                       SourceLocation Loc, const Expr *Filter) {
2061  if (!CGF.HaveInsertPoint())
2062    return;
2063  // if(__kmpc_masked(ident_t *, gtid, filter)) {
2064  //   MaskedOpGen();
2065  //   __kmpc_end_masked(iden_t *, gtid);
2066  // }
2067  // Prepare arguments and build a call to __kmpc_masked
2068  llvm::Value *FilterVal = Filter
2069                               ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2070                               : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2071  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2072                         FilterVal};
2073  llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2074                            getThreadID(CGF, Loc)};
2075  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2076                            CGM.getModule(), OMPRTL___kmpc_masked),
2077                        Args,
2078                        OMPBuilder.getOrCreateRuntimeFunction(
2079                            CGM.getModule(), OMPRTL___kmpc_end_masked),
2080                        ArgsEnd,
2081                        /*Conditional=*/true);
2082  MaskedOpGen.setAction(Action);
2083  emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2084  Action.Done(CGF);
2085}
2086
2087void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2088                                        SourceLocation Loc) {
2089  if (!CGF.HaveInsertPoint())
2090    return;
2091  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2092    OMPBuilder.createTaskyield(CGF.Builder);
2093  } else {
2094    // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2095    llvm::Value *Args[] = {
2096        emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2097        llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2098    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2099                            CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2100                        Args);
2101  }
2102
2103  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2104    Region->emitUntiedSwitch(CGF);
2105}
2106
2107void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2108                                          const RegionCodeGenTy &TaskgroupOpGen,
2109                                          SourceLocation Loc) {
2110  if (!CGF.HaveInsertPoint())
2111    return;
2112  // __kmpc_taskgroup(ident_t *, gtid);
2113  // TaskgroupOpGen();
2114  // __kmpc_end_taskgroup(ident_t *, gtid);
2115  // Prepare arguments and build a call to __kmpc_taskgroup
2116  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2117  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2118                            CGM.getModule(), OMPRTL___kmpc_taskgroup),
2119                        Args,
2120                        OMPBuilder.getOrCreateRuntimeFunction(
2121                            CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2122                        Args);
2123  TaskgroupOpGen.setAction(Action);
2124  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2125}
2126
2127/// Given an array of pointers to variables, project the address of a
2128/// given variable.
2129static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2130                                      unsigned Index, const VarDecl *Var) {
2131  // Pull out the pointer to the variable.
2132  Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2133  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2134
2135  llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2136  return Address(
2137      CGF.Builder.CreateBitCast(
2138          Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2139      ElemTy, CGF.getContext().getDeclAlign(Var));
2140}
2141
2142static llvm::Value *emitCopyprivateCopyFunction(
2143    CodeGenModule &CGM, llvm::Type *ArgsElemType,
2144    ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2145    ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2146    SourceLocation Loc) {
2147  ASTContext &C = CGM.getContext();
2148  // void copy_func(void *LHSArg, void *RHSArg);
2149  FunctionArgList Args;
2150  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2151                           ImplicitParamKind::Other);
2152  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2153                           ImplicitParamKind::Other);
2154  Args.push_back(&LHSArg);
2155  Args.push_back(&RHSArg);
2156  const auto &CGFI =
2157      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2158  std::string Name =
2159      CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2160  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2161                                    llvm::GlobalValue::InternalLinkage, Name,
2162                                    &CGM.getModule());
2163  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2164  Fn->setDoesNotRecurse();
2165  CodeGenFunction CGF(CGM);
2166  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2167  // Dest = (void*[n])(LHSArg);
2168  // Src = (void*[n])(RHSArg);
2169  Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2170                  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2171                  ArgsElemType->getPointerTo()),
2172              ArgsElemType, CGF.getPointerAlign());
2173  Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2174                  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2175                  ArgsElemType->getPointerTo()),
2176              ArgsElemType, CGF.getPointerAlign());
2177  // *(Type0*)Dst[0] = *(Type0*)Src[0];
2178  // *(Type1*)Dst[1] = *(Type1*)Src[1];
2179  // ...
2180  // *(Typen*)Dst[n] = *(Typen*)Src[n];
2181  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2182    const auto *DestVar =
2183        cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2184    Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2185
2186    const auto *SrcVar =
2187        cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2188    Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2189
2190    const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2191    QualType Type = VD->getType();
2192    CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2193  }
2194  CGF.FinishFunction();
2195  return Fn;
2196}
2197
2198void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2199                                       const RegionCodeGenTy &SingleOpGen,
2200                                       SourceLocation Loc,
2201                                       ArrayRef<const Expr *> CopyprivateVars,
2202                                       ArrayRef<const Expr *> SrcExprs,
2203                                       ArrayRef<const Expr *> DstExprs,
2204                                       ArrayRef<const Expr *> AssignmentOps) {
2205  if (!CGF.HaveInsertPoint())
2206    return;
2207  assert(CopyprivateVars.size() == SrcExprs.size() &&
2208         CopyprivateVars.size() == DstExprs.size() &&
2209         CopyprivateVars.size() == AssignmentOps.size());
2210  ASTContext &C = CGM.getContext();
2211  // int32 did_it = 0;
2212  // if(__kmpc_single(ident_t *, gtid)) {
2213  //   SingleOpGen();
2214  //   __kmpc_end_single(ident_t *, gtid);
2215  //   did_it = 1;
2216  // }
2217  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2218  // <copy_func>, did_it);
2219
2220  Address DidIt = Address::invalid();
2221  if (!CopyprivateVars.empty()) {
2222    // int32 did_it = 0;
2223    QualType KmpInt32Ty =
2224        C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2225    DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2226    CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2227  }
2228  // Prepare arguments and build a call to __kmpc_single
2229  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2230  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2231                            CGM.getModule(), OMPRTL___kmpc_single),
2232                        Args,
2233                        OMPBuilder.getOrCreateRuntimeFunction(
2234                            CGM.getModule(), OMPRTL___kmpc_end_single),
2235                        Args,
2236                        /*Conditional=*/true);
2237  SingleOpGen.setAction(Action);
2238  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2239  if (DidIt.isValid()) {
2240    // did_it = 1;
2241    CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2242  }
2243  Action.Done(CGF);
2244  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2245  // <copy_func>, did_it);
2246  if (DidIt.isValid()) {
2247    llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2248    QualType CopyprivateArrayTy = C.getConstantArrayType(
2249        C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2250        /*IndexTypeQuals=*/0);
2251    // Create a list of all private variables for copyprivate.
2252    Address CopyprivateList =
2253        CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2254    for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2255      Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2256      CGF.Builder.CreateStore(
2257          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2258              CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2259              CGF.VoidPtrTy),
2260          Elem);
2261    }
2262    // Build function that copies private values from single region to all other
2263    // threads in the corresponding parallel region.
2264    llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2265        CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2266        SrcExprs, DstExprs, AssignmentOps, Loc);
2267    llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2268    Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2269        CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2270    llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2271    llvm::Value *Args[] = {
2272        emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2273        getThreadID(CGF, Loc),        // i32 <gtid>
2274        BufSize,                      // size_t <buf_size>
2275        CL.getPointer(),              // void *<copyprivate list>
2276        CpyFn,                        // void (*) (void *, void *) <copy_func>
2277        DidItVal                      // i32 did_it
2278    };
2279    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2280                            CGM.getModule(), OMPRTL___kmpc_copyprivate),
2281                        Args);
2282  }
2283}
2284
2285void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2286                                        const RegionCodeGenTy &OrderedOpGen,
2287                                        SourceLocation Loc, bool IsThreads) {
2288  if (!CGF.HaveInsertPoint())
2289    return;
2290  // __kmpc_ordered(ident_t *, gtid);
2291  // OrderedOpGen();
2292  // __kmpc_end_ordered(ident_t *, gtid);
2293  // Prepare arguments and build a call to __kmpc_ordered
2294  if (IsThreads) {
2295    llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2296    CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2297                              CGM.getModule(), OMPRTL___kmpc_ordered),
2298                          Args,
2299                          OMPBuilder.getOrCreateRuntimeFunction(
2300                              CGM.getModule(), OMPRTL___kmpc_end_ordered),
2301                          Args);
2302    OrderedOpGen.setAction(Action);
2303    emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2304    return;
2305  }
2306  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2307}
2308
2309unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2310  unsigned Flags;
2311  if (Kind == OMPD_for)
2312    Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2313  else if (Kind == OMPD_sections)
2314    Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2315  else if (Kind == OMPD_single)
2316    Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2317  else if (Kind == OMPD_barrier)
2318    Flags = OMP_IDENT_BARRIER_EXPL;
2319  else
2320    Flags = OMP_IDENT_BARRIER_IMPL;
2321  return Flags;
2322}
2323
2324void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2325    CodeGenFunction &CGF, const OMPLoopDirective &S,
2326    OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2327  // Check if the loop directive is actually a doacross loop directive. In this
2328  // case choose static, 1 schedule.
2329  if (llvm::any_of(
2330          S.getClausesOfKind<OMPOrderedClause>(),
2331          [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2332    ScheduleKind = OMPC_SCHEDULE_static;
2333    // Chunk size is 1 in this case.
2334    llvm::APInt ChunkSize(32, 1);
2335    ChunkExpr = IntegerLiteral::Create(
2336        CGF.getContext(), ChunkSize,
2337        CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2338        SourceLocation());
2339  }
2340}
2341
2342void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2343                                      OpenMPDirectiveKind Kind, bool EmitChecks,
2344                                      bool ForceSimpleCall) {
2345  // Check if we should use the OMPBuilder
2346  auto *OMPRegionInfo =
2347      dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2348  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2349    CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2350        CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2351    return;
2352  }
2353
2354  if (!CGF.HaveInsertPoint())
2355    return;
2356  // Build call __kmpc_cancel_barrier(loc, thread_id);
2357  // Build call __kmpc_barrier(loc, thread_id);
2358  unsigned Flags = getDefaultFlagsForBarriers(Kind);
2359  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2360  // thread_id);
2361  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2362                         getThreadID(CGF, Loc)};
2363  if (OMPRegionInfo) {
2364    if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2365      llvm::Value *Result = CGF.EmitRuntimeCall(
2366          OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2367                                                OMPRTL___kmpc_cancel_barrier),
2368          Args);
2369      if (EmitChecks) {
2370        // if (__kmpc_cancel_barrier()) {
2371        //   exit from construct;
2372        // }
2373        llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2374        llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2375        llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2376        CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2377        CGF.EmitBlock(ExitBB);
2378        //   exit from construct;
2379        CodeGenFunction::JumpDest CancelDestination =
2380            CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2381        CGF.EmitBranchThroughCleanup(CancelDestination);
2382        CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2383      }
2384      return;
2385    }
2386  }
2387  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2388                          CGM.getModule(), OMPRTL___kmpc_barrier),
2389                      Args);
2390}
2391
2392void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2393                                    Expr *ME, bool IsFatal) {
2394  llvm::Value *MVL =
2395      ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2396         : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2397  // Build call void __kmpc_error(ident_t *loc, int severity, const char
2398  // *message)
2399  llvm::Value *Args[] = {
2400      emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2401      llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2402      CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2403  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2404                          CGM.getModule(), OMPRTL___kmpc_error),
2405                      Args);
2406}
2407
2408/// Map the OpenMP loop schedule to the runtime enumeration.
2409static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2410                                          bool Chunked, bool Ordered) {
2411  switch (ScheduleKind) {
2412  case OMPC_SCHEDULE_static:
2413    return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2414                   : (Ordered ? OMP_ord_static : OMP_sch_static);
2415  case OMPC_SCHEDULE_dynamic:
2416    return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2417  case OMPC_SCHEDULE_guided:
2418    return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2419  case OMPC_SCHEDULE_runtime:
2420    return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2421  case OMPC_SCHEDULE_auto:
2422    return Ordered ? OMP_ord_auto : OMP_sch_auto;
2423  case OMPC_SCHEDULE_unknown:
2424    assert(!Chunked && "chunk was specified but schedule kind not known");
2425    return Ordered ? OMP_ord_static : OMP_sch_static;
2426  }
2427  llvm_unreachable("Unexpected runtime schedule");
2428}
2429
2430/// Map the OpenMP distribute schedule to the runtime enumeration.
2431static OpenMPSchedType
2432getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2433  // only static is allowed for dist_schedule
2434  return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2435}
2436
2437bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2438                                         bool Chunked) const {
2439  OpenMPSchedType Schedule =
2440      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2441  return Schedule == OMP_sch_static;
2442}
2443
2444bool CGOpenMPRuntime::isStaticNonchunked(
2445    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2446  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2447  return Schedule == OMP_dist_sch_static;
2448}
2449
2450bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2451                                      bool Chunked) const {
2452  OpenMPSchedType Schedule =
2453      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2454  return Schedule == OMP_sch_static_chunked;
2455}
2456
2457bool CGOpenMPRuntime::isStaticChunked(
2458    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2459  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2460  return Schedule == OMP_dist_sch_static_chunked;
2461}
2462
2463bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2464  OpenMPSchedType Schedule =
2465      getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2466  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2467  return Schedule != OMP_sch_static;
2468}
2469
2470static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2471                                  OpenMPScheduleClauseModifier M1,
2472                                  OpenMPScheduleClauseModifier M2) {
2473  int Modifier = 0;
2474  switch (M1) {
2475  case OMPC_SCHEDULE_MODIFIER_monotonic:
2476    Modifier = OMP_sch_modifier_monotonic;
2477    break;
2478  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2479    Modifier = OMP_sch_modifier_nonmonotonic;
2480    break;
2481  case OMPC_SCHEDULE_MODIFIER_simd:
2482    if (Schedule == OMP_sch_static_chunked)
2483      Schedule = OMP_sch_static_balanced_chunked;
2484    break;
2485  case OMPC_SCHEDULE_MODIFIER_last:
2486  case OMPC_SCHEDULE_MODIFIER_unknown:
2487    break;
2488  }
2489  switch (M2) {
2490  case OMPC_SCHEDULE_MODIFIER_monotonic:
2491    Modifier = OMP_sch_modifier_monotonic;
2492    break;
2493  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2494    Modifier = OMP_sch_modifier_nonmonotonic;
2495    break;
2496  case OMPC_SCHEDULE_MODIFIER_simd:
2497    if (Schedule == OMP_sch_static_chunked)
2498      Schedule = OMP_sch_static_balanced_chunked;
2499    break;
2500  case OMPC_SCHEDULE_MODIFIER_last:
2501  case OMPC_SCHEDULE_MODIFIER_unknown:
2502    break;
2503  }
2504  // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2505  // If the static schedule kind is specified or if the ordered clause is
2506  // specified, and if the nonmonotonic modifier is not specified, the effect is
2507  // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2508  // modifier is specified, the effect is as if the nonmonotonic modifier is
2509  // specified.
2510  if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2511    if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2512          Schedule == OMP_sch_static_balanced_chunked ||
2513          Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2514          Schedule == OMP_dist_sch_static_chunked ||
2515          Schedule == OMP_dist_sch_static))
2516      Modifier = OMP_sch_modifier_nonmonotonic;
2517  }
2518  return Schedule | Modifier;
2519}
2520
2521void CGOpenMPRuntime::emitForDispatchInit(
2522    CodeGenFunction &CGF, SourceLocation Loc,
2523    const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2524    bool Ordered, const DispatchRTInput &DispatchValues) {
2525  if (!CGF.HaveInsertPoint())
2526    return;
2527  OpenMPSchedType Schedule = getRuntimeSchedule(
2528      ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2529  assert(Ordered ||
2530         (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2531          Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2532          Schedule != OMP_sch_static_balanced_chunked));
2533  // Call __kmpc_dispatch_init(
2534  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2535  //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2536  //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2537
2538  // If the Chunk was not specified in the clause - use default value 1.
2539  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2540                                            : CGF.Builder.getIntN(IVSize, 1);
2541  llvm::Value *Args[] = {
2542      emitUpdateLocation(CGF, Loc),
2543      getThreadID(CGF, Loc),
2544      CGF.Builder.getInt32(addMonoNonMonoModifier(
2545          CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2546      DispatchValues.LB,                                     // Lower
2547      DispatchValues.UB,                                     // Upper
2548      CGF.Builder.getIntN(IVSize, 1),                        // Stride
2549      Chunk                                                  // Chunk
2550  };
2551  CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2552                      Args);
2553}
2554
2555static void emitForStaticInitCall(
2556    CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2557    llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2558    OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2559    const CGOpenMPRuntime::StaticRTInput &Values) {
2560  if (!CGF.HaveInsertPoint())
2561    return;
2562
2563  assert(!Values.Ordered);
2564  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2565         Schedule == OMP_sch_static_balanced_chunked ||
2566         Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2567         Schedule == OMP_dist_sch_static ||
2568         Schedule == OMP_dist_sch_static_chunked);
2569
2570  // Call __kmpc_for_static_init(
2571  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2572  //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2573  //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2574  //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2575  llvm::Value *Chunk = Values.Chunk;
2576  if (Chunk == nullptr) {
2577    assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2578            Schedule == OMP_dist_sch_static) &&
2579           "expected static non-chunked schedule");
2580    // If the Chunk was not specified in the clause - use default value 1.
2581    Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2582  } else {
2583    assert((Schedule == OMP_sch_static_chunked ||
2584            Schedule == OMP_sch_static_balanced_chunked ||
2585            Schedule == OMP_ord_static_chunked ||
2586            Schedule == OMP_dist_sch_static_chunked) &&
2587           "expected static chunked schedule");
2588  }
2589  llvm::Value *Args[] = {
2590      UpdateLocation,
2591      ThreadId,
2592      CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2593                                                  M2)), // Schedule type
2594      Values.IL.getPointer(),                           // &isLastIter
2595      Values.LB.getPointer(),                           // &LB
2596      Values.UB.getPointer(),                           // &UB
2597      Values.ST.getPointer(),                           // &Stride
2598      CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2599      Chunk                                             // Chunk
2600  };
2601  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2602}
2603
2604void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2605                                        SourceLocation Loc,
2606                                        OpenMPDirectiveKind DKind,
2607                                        const OpenMPScheduleTy &ScheduleKind,
2608                                        const StaticRTInput &Values) {
2609  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2610      ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2611  assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2612         "Expected loop-based or sections-based directive.");
2613  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2614                                             isOpenMPLoopDirective(DKind)
2615                                                 ? OMP_IDENT_WORK_LOOP
2616                                                 : OMP_IDENT_WORK_SECTIONS);
2617  llvm::Value *ThreadId = getThreadID(CGF, Loc);
2618  llvm::FunctionCallee StaticInitFunction =
2619      OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2620                                             false);
2621  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2622  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2623                        ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2624}
2625
2626void CGOpenMPRuntime::emitDistributeStaticInit(
2627    CodeGenFunction &CGF, SourceLocation Loc,
2628    OpenMPDistScheduleClauseKind SchedKind,
2629    const CGOpenMPRuntime::StaticRTInput &Values) {
2630  OpenMPSchedType ScheduleNum =
2631      getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2632  llvm::Value *UpdatedLocation =
2633      emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2634  llvm::Value *ThreadId = getThreadID(CGF, Loc);
2635  llvm::FunctionCallee StaticInitFunction;
2636  bool isGPUDistribute =
2637      CGM.getLangOpts().OpenMPIsTargetDevice &&
2638      (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2639  StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2640      Values.IVSize, Values.IVSigned, isGPUDistribute);
2641
2642  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2643                        ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2644                        OMPC_SCHEDULE_MODIFIER_unknown, Values);
2645}
2646
2647void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2648                                          SourceLocation Loc,
2649                                          OpenMPDirectiveKind DKind) {
2650  if (!CGF.HaveInsertPoint())
2651    return;
2652  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2653  llvm::Value *Args[] = {
2654      emitUpdateLocation(CGF, Loc,
2655                         isOpenMPDistributeDirective(DKind)
2656                             ? OMP_IDENT_WORK_DISTRIBUTE
2657                             : isOpenMPLoopDirective(DKind)
2658                                   ? OMP_IDENT_WORK_LOOP
2659                                   : OMP_IDENT_WORK_SECTIONS),
2660      getThreadID(CGF, Loc)};
2661  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2662  if (isOpenMPDistributeDirective(DKind) &&
2663      CGM.getLangOpts().OpenMPIsTargetDevice &&
2664      (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2665    CGF.EmitRuntimeCall(
2666        OMPBuilder.getOrCreateRuntimeFunction(
2667            CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2668        Args);
2669  else
2670    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2671                            CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2672                        Args);
2673}
2674
2675void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2676                                                 SourceLocation Loc,
2677                                                 unsigned IVSize,
2678                                                 bool IVSigned) {
2679  if (!CGF.HaveInsertPoint())
2680    return;
2681  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2682  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2683  CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2684                      Args);
2685}
2686
2687llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2688                                          SourceLocation Loc, unsigned IVSize,
2689                                          bool IVSigned, Address IL,
2690                                          Address LB, Address UB,
2691                                          Address ST) {
2692  // Call __kmpc_dispatch_next(
2693  //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2694  //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2695  //          kmp_int[32|64] *p_stride);
2696  llvm::Value *Args[] = {
2697      emitUpdateLocation(CGF, Loc),
2698      getThreadID(CGF, Loc),
2699      IL.getPointer(), // &isLastIter
2700      LB.getPointer(), // &Lower
2701      UB.getPointer(), // &Upper
2702      ST.getPointer()  // &Stride
2703  };
2704  llvm::Value *Call = CGF.EmitRuntimeCall(
2705      OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2706  return CGF.EmitScalarConversion(
2707      Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2708      CGF.getContext().BoolTy, Loc);
2709}
2710
2711void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2712                                           llvm::Value *NumThreads,
2713                                           SourceLocation Loc) {
2714  if (!CGF.HaveInsertPoint())
2715    return;
2716  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2717  llvm::Value *Args[] = {
2718      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2719      CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2720  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2721                          CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2722                      Args);
2723}
2724
2725void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2726                                         ProcBindKind ProcBind,
2727                                         SourceLocation Loc) {
2728  if (!CGF.HaveInsertPoint())
2729    return;
2730  assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2731  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2732  llvm::Value *Args[] = {
2733      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2734      llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2735  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2736                          CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2737                      Args);
2738}
2739
2740void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2741                                SourceLocation Loc, llvm::AtomicOrdering AO) {
2742  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2743    OMPBuilder.createFlush(CGF.Builder);
2744  } else {
2745    if (!CGF.HaveInsertPoint())
2746      return;
2747    // Build call void __kmpc_flush(ident_t *loc)
2748    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2749                            CGM.getModule(), OMPRTL___kmpc_flush),
2750                        emitUpdateLocation(CGF, Loc));
2751  }
2752}
2753
2754namespace {
2755/// Indexes of fields for type kmp_task_t.
2756enum KmpTaskTFields {
2757  /// List of shared variables.
2758  KmpTaskTShareds,
2759  /// Task routine.
2760  KmpTaskTRoutine,
2761  /// Partition id for the untied tasks.
2762  KmpTaskTPartId,
2763  /// Function with call of destructors for private variables.
2764  Data1,
2765  /// Task priority.
2766  Data2,
2767  /// (Taskloops only) Lower bound.
2768  KmpTaskTLowerBound,
2769  /// (Taskloops only) Upper bound.
2770  KmpTaskTUpperBound,
2771  /// (Taskloops only) Stride.
2772  KmpTaskTStride,
2773  /// (Taskloops only) Is last iteration flag.
2774  KmpTaskTLastIter,
2775  /// (Taskloops only) Reduction data.
2776  KmpTaskTReductions,
2777};
2778} // anonymous namespace
2779
2780void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2781  // If we are in simd mode or there are no entries, we don't need to do
2782  // anything.
2783  if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2784    return;
2785
2786  llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2787      [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2788             const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2789    SourceLocation Loc;
2790    if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2791      for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2792                E = CGM.getContext().getSourceManager().fileinfo_end();
2793           I != E; ++I) {
2794        if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2795            I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2796          Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2797              I->getFirst(), EntryInfo.Line, 1);
2798          break;
2799        }
2800      }
2801    }
2802    switch (Kind) {
2803    case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2804      unsigned DiagID = CGM.getDiags().getCustomDiagID(
2805          DiagnosticsEngine::Error, "Offloading entry for target region in "
2806                                    "%0 is incorrect: either the "
2807                                    "address or the ID is invalid.");
2808      CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2809    } break;
2810    case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2811      unsigned DiagID = CGM.getDiags().getCustomDiagID(
2812          DiagnosticsEngine::Error, "Offloading entry for declare target "
2813                                    "variable %0 is incorrect: the "
2814                                    "address is invalid.");
2815      CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2816    } break;
2817    case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2818      unsigned DiagID = CGM.getDiags().getCustomDiagID(
2819          DiagnosticsEngine::Error,
2820          "Offloading entry for declare target variable is incorrect: the "
2821          "address is invalid.");
2822      CGM.getDiags().Report(DiagID);
2823    } break;
2824    }
2825  };
2826
2827  OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2828}
2829
2830void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2831  if (!KmpRoutineEntryPtrTy) {
2832    // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2833    ASTContext &C = CGM.getContext();
2834    QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2835    FunctionProtoType::ExtProtoInfo EPI;
2836    KmpRoutineEntryPtrQTy = C.getPointerType(
2837        C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2838    KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2839  }
2840}
2841
2842namespace {
2843struct PrivateHelpersTy {
2844  PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2845                   const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2846      : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2847        PrivateElemInit(PrivateElemInit) {}
2848  PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2849  const Expr *OriginalRef = nullptr;
2850  const VarDecl *Original = nullptr;
2851  const VarDecl *PrivateCopy = nullptr;
2852  const VarDecl *PrivateElemInit = nullptr;
2853  bool isLocalPrivate() const {
2854    return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2855  }
2856};
2857typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2858} // anonymous namespace
2859
2860static bool isAllocatableDecl(const VarDecl *VD) {
2861  const VarDecl *CVD = VD->getCanonicalDecl();
2862  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2863    return false;
2864  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2865  // Use the default allocation.
2866  return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2867           !AA->getAllocator());
2868}
2869
2870static RecordDecl *
2871createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2872  if (!Privates.empty()) {
2873    ASTContext &C = CGM.getContext();
2874    // Build struct .kmp_privates_t. {
2875    //         /*  private vars  */
2876    //       };
2877    RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2878    RD->startDefinition();
2879    for (const auto &Pair : Privates) {
2880      const VarDecl *VD = Pair.second.Original;
2881      QualType Type = VD->getType().getNonReferenceType();
2882      // If the private variable is a local variable with lvalue ref type,
2883      // allocate the pointer instead of the pointee type.
2884      if (Pair.second.isLocalPrivate()) {
2885        if (VD->getType()->isLValueReferenceType())
2886          Type = C.getPointerType(Type);
2887        if (isAllocatableDecl(VD))
2888          Type = C.getPointerType(Type);
2889      }
2890      FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
2891      if (VD->hasAttrs()) {
2892        for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2893             E(VD->getAttrs().end());
2894             I != E; ++I)
2895          FD->addAttr(*I);
2896      }
2897    }
2898    RD->completeDefinition();
2899    return RD;
2900  }
2901  return nullptr;
2902}
2903
2904static RecordDecl *
2905createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2906                         QualType KmpInt32Ty,
2907                         QualType KmpRoutineEntryPointerQTy) {
2908  ASTContext &C = CGM.getContext();
2909  // Build struct kmp_task_t {
2910  //         void *              shareds;
2911  //         kmp_routine_entry_t routine;
2912  //         kmp_int32           part_id;
2913  //         kmp_cmplrdata_t data1;
2914  //         kmp_cmplrdata_t data2;
2915  // For taskloops additional fields:
2916  //         kmp_uint64          lb;
2917  //         kmp_uint64          ub;
2918  //         kmp_int64           st;
2919  //         kmp_int32           liter;
2920  //         void *              reductions;
2921  //       };
2922  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2923  UD->startDefinition();
2924  addFieldToRecordDecl(C, UD, KmpInt32Ty);
2925  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2926  UD->completeDefinition();
2927  QualType KmpCmplrdataTy = C.getRecordType(UD);
2928  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2929  RD->startDefinition();
2930  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2931  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2932  addFieldToRecordDecl(C, RD, KmpInt32Ty);
2933  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2934  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2935  if (isOpenMPTaskLoopDirective(Kind)) {
2936    QualType KmpUInt64Ty =
2937        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2938    QualType KmpInt64Ty =
2939        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2940    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2941    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2942    addFieldToRecordDecl(C, RD, KmpInt64Ty);
2943    addFieldToRecordDecl(C, RD, KmpInt32Ty);
2944    addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2945  }
2946  RD->completeDefinition();
2947  return RD;
2948}
2949
2950static RecordDecl *
2951createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2952                                     ArrayRef<PrivateDataTy> Privates) {
2953  ASTContext &C = CGM.getContext();
2954  // Build struct kmp_task_t_with_privates {
2955  //         kmp_task_t task_data;
2956  //         .kmp_privates_t. privates;
2957  //       };
2958  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2959  RD->startDefinition();
2960  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2961  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2962    addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2963  RD->completeDefinition();
2964  return RD;
2965}
2966
2967/// Emit a proxy function which accepts kmp_task_t as the second
2968/// argument.
2969/// \code
2970/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2971///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2972///   For taskloops:
2973///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2974///   tt->reductions, tt->shareds);
2975///   return 0;
2976/// }
2977/// \endcode
2978static llvm::Function *
2979emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2980                      OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2981                      QualType KmpTaskTWithPrivatesPtrQTy,
2982                      QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2983                      QualType SharedsPtrTy, llvm::Function *TaskFunction,
2984                      llvm::Value *TaskPrivatesMap) {
2985  ASTContext &C = CGM.getContext();
2986  FunctionArgList Args;
2987  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2988                            ImplicitParamKind::Other);
2989  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2990                                KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2991                                ImplicitParamKind::Other);
2992  Args.push_back(&GtidArg);
2993  Args.push_back(&TaskTypeArg);
2994  const auto &TaskEntryFnInfo =
2995      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
2996  llvm::FunctionType *TaskEntryTy =
2997      CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2998  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
2999  auto *TaskEntry = llvm::Function::Create(
3000      TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3001  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3002  TaskEntry->setDoesNotRecurse();
3003  CodeGenFunction CGF(CGM);
3004  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3005                    Loc, Loc);
3006
3007  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3008  // tt,
3009  // For taskloops:
3010  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3011  // tt->task_data.shareds);
3012  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3013      CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3014  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3015      CGF.GetAddrOfLocalVar(&TaskTypeArg),
3016      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3017  const auto *KmpTaskTWithPrivatesQTyRD =
3018      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3019  LValue Base =
3020      CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3021  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3022  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3023  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3024  llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3025
3026  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3027  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3028  llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3029      CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3030      CGF.ConvertTypeForMem(SharedsPtrTy));
3031
3032  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3033  llvm::Value *PrivatesParam;
3034  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3035    LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3036    PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3037        PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3038  } else {
3039    PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3040  }
3041
3042  llvm::Value *CommonArgs[] = {
3043      GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3044      CGF.Builder
3045          .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3046                                               CGF.VoidPtrTy, CGF.Int8Ty)
3047          .getPointer()};
3048  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3049                                          std::end(CommonArgs));
3050  if (isOpenMPTaskLoopDirective(Kind)) {
3051    auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3052    LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3053    llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3054    auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3055    LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3056    llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3057    auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3058    LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3059    llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3060    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3061    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3062    llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3063    auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3064    LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3065    llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3066    CallArgs.push_back(LBParam);
3067    CallArgs.push_back(UBParam);
3068    CallArgs.push_back(StParam);
3069    CallArgs.push_back(LIParam);
3070    CallArgs.push_back(RParam);
3071  }
3072  CallArgs.push_back(SharedsParam);
3073
3074  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3075                                                  CallArgs);
3076  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3077                             CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3078  CGF.FinishFunction();
3079  return TaskEntry;
3080}
3081
3082static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3083                                            SourceLocation Loc,
3084                                            QualType KmpInt32Ty,
3085                                            QualType KmpTaskTWithPrivatesPtrQTy,
3086                                            QualType KmpTaskTWithPrivatesQTy) {
3087  ASTContext &C = CGM.getContext();
3088  FunctionArgList Args;
3089  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3090                            ImplicitParamKind::Other);
3091  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3092                                KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3093                                ImplicitParamKind::Other);
3094  Args.push_back(&GtidArg);
3095  Args.push_back(&TaskTypeArg);
3096  const auto &DestructorFnInfo =
3097      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3098  llvm::FunctionType *DestructorFnTy =
3099      CGM.getTypes().GetFunctionType(DestructorFnInfo);
3100  std::string Name =
3101      CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3102  auto *DestructorFn =
3103      llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3104                             Name, &CGM.getModule());
3105  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3106                                    DestructorFnInfo);
3107  DestructorFn->setDoesNotRecurse();
3108  CodeGenFunction CGF(CGM);
3109  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3110                    Args, Loc, Loc);
3111
3112  LValue Base = CGF.EmitLoadOfPointerLValue(
3113      CGF.GetAddrOfLocalVar(&TaskTypeArg),
3114      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3115  const auto *KmpTaskTWithPrivatesQTyRD =
3116      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3117  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3118  Base = CGF.EmitLValueForField(Base, *FI);
3119  for (const auto *Field :
3120       cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3121    if (QualType::DestructionKind DtorKind =
3122            Field->getType().isDestructedType()) {
3123      LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3124      CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3125    }
3126  }
3127  CGF.FinishFunction();
3128  return DestructorFn;
3129}
3130
3131/// Emit a privates mapping function for correct handling of private and
3132/// firstprivate variables.
3133/// \code
3134/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3135/// **noalias priv1,...,  <tyn> **noalias privn) {
3136///   *priv1 = &.privates.priv1;
3137///   ...;
3138///   *privn = &.privates.privn;
3139/// }
3140/// \endcode
3141static llvm::Value *
3142emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3143                               const OMPTaskDataTy &Data, QualType PrivatesQTy,
3144                               ArrayRef<PrivateDataTy> Privates) {
3145  ASTContext &C = CGM.getContext();
3146  FunctionArgList Args;
3147  ImplicitParamDecl TaskPrivatesArg(
3148      C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3149      C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3150      ImplicitParamKind::Other);
3151  Args.push_back(&TaskPrivatesArg);
3152  llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3153  unsigned Counter = 1;
3154  for (const Expr *E : Data.PrivateVars) {
3155    Args.push_back(ImplicitParamDecl::Create(
3156        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3157        C.getPointerType(C.getPointerType(E->getType()))
3158            .withConst()
3159            .withRestrict(),
3160        ImplicitParamKind::Other));
3161    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3162    PrivateVarsPos[VD] = Counter;
3163    ++Counter;
3164  }
3165  for (const Expr *E : Data.FirstprivateVars) {
3166    Args.push_back(ImplicitParamDecl::Create(
3167        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3168        C.getPointerType(C.getPointerType(E->getType()))
3169            .withConst()
3170            .withRestrict(),
3171        ImplicitParamKind::Other));
3172    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3173    PrivateVarsPos[VD] = Counter;
3174    ++Counter;
3175  }
3176  for (const Expr *E : Data.LastprivateVars) {
3177    Args.push_back(ImplicitParamDecl::Create(
3178        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3179        C.getPointerType(C.getPointerType(E->getType()))
3180            .withConst()
3181            .withRestrict(),
3182        ImplicitParamKind::Other));
3183    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3184    PrivateVarsPos[VD] = Counter;
3185    ++Counter;
3186  }
3187  for (const VarDecl *VD : Data.PrivateLocals) {
3188    QualType Ty = VD->getType().getNonReferenceType();
3189    if (VD->getType()->isLValueReferenceType())
3190      Ty = C.getPointerType(Ty);
3191    if (isAllocatableDecl(VD))
3192      Ty = C.getPointerType(Ty);
3193    Args.push_back(ImplicitParamDecl::Create(
3194        C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3195        C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3196        ImplicitParamKind::Other));
3197    PrivateVarsPos[VD] = Counter;
3198    ++Counter;
3199  }
3200  const auto &TaskPrivatesMapFnInfo =
3201      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3202  llvm::FunctionType *TaskPrivatesMapTy =
3203      CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3204  std::string Name =
3205      CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3206  auto *TaskPrivatesMap = llvm::Function::Create(
3207      TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3208      &CGM.getModule());
3209  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3210                                    TaskPrivatesMapFnInfo);
3211  if (CGM.getLangOpts().Optimize) {
3212    TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3213    TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3214    TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3215  }
3216  CodeGenFunction CGF(CGM);
3217  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3218                    TaskPrivatesMapFnInfo, Args, Loc, Loc);
3219
3220  // *privi = &.privates.privi;
3221  LValue Base = CGF.EmitLoadOfPointerLValue(
3222      CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3223      TaskPrivatesArg.getType()->castAs<PointerType>());
3224  const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3225  Counter = 0;
3226  for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3227    LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3228    const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3229    LValue RefLVal =
3230        CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3231    LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3232        RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3233    CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3234    ++Counter;
3235  }
3236  CGF.FinishFunction();
3237  return TaskPrivatesMap;
3238}
3239
3240/// Emit initialization for private variables in task-based directives.
3241static void emitPrivatesInit(CodeGenFunction &CGF,
3242                             const OMPExecutableDirective &D,
3243                             Address KmpTaskSharedsPtr, LValue TDBase,
3244                             const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3245                             QualType SharedsTy, QualType SharedsPtrTy,
3246                             const OMPTaskDataTy &Data,
3247                             ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3248  ASTContext &C = CGF.getContext();
3249  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3250  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3251  OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3252                                 ? OMPD_taskloop
3253                                 : OMPD_task;
3254  const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3255  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3256  LValue SrcBase;
3257  bool IsTargetTask =
3258      isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3259      isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3260  // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3261  // PointersArray, SizesArray, and MappersArray. The original variables for
3262  // these arrays are not captured and we get their addresses explicitly.
3263  if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3264      (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3265    SrcBase = CGF.MakeAddrLValue(
3266        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3267            KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3268            CGF.ConvertTypeForMem(SharedsTy)),
3269        SharedsTy);
3270  }
3271  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3272  for (const PrivateDataTy &Pair : Privates) {
3273    // Do not initialize private locals.
3274    if (Pair.second.isLocalPrivate()) {
3275      ++FI;
3276      continue;
3277    }
3278    const VarDecl *VD = Pair.second.PrivateCopy;
3279    const Expr *Init = VD->getAnyInitializer();
3280    if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3281                             !CGF.isTrivialInitializer(Init)))) {
3282      LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3283      if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3284        const VarDecl *OriginalVD = Pair.second.Original;
3285        // Check if the variable is the target-based BasePointersArray,
3286        // PointersArray, SizesArray, or MappersArray.
3287        LValue SharedRefLValue;
3288        QualType Type = PrivateLValue.getType();
3289        const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3290        if (IsTargetTask && !SharedField) {
3291          assert(isa<ImplicitParamDecl>(OriginalVD) &&
3292                 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3293                 cast<CapturedDecl>(OriginalVD->getDeclContext())
3294                         ->getNumParams() == 0 &&
3295                 isa<TranslationUnitDecl>(
3296                     cast<CapturedDecl>(OriginalVD->getDeclContext())
3297                         ->getDeclContext()) &&
3298                 "Expected artificial target data variable.");
3299          SharedRefLValue =
3300              CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3301        } else if (ForDup) {
3302          SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3303          SharedRefLValue = CGF.MakeAddrLValue(
3304              SharedRefLValue.getAddress(CGF).withAlignment(
3305                  C.getDeclAlign(OriginalVD)),
3306              SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3307              SharedRefLValue.getTBAAInfo());
3308        } else if (CGF.LambdaCaptureFields.count(
3309                       Pair.second.Original->getCanonicalDecl()) > 0 ||
3310                   isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3311          SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3312        } else {
3313          // Processing for implicitly captured variables.
3314          InlinedOpenMPRegionRAII Region(
3315              CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3316              /*HasCancel=*/false, /*NoInheritance=*/true);
3317          SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3318        }
3319        if (Type->isArrayType()) {
3320          // Initialize firstprivate array.
3321          if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3322            // Perform simple memcpy.
3323            CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3324          } else {
3325            // Initialize firstprivate array using element-by-element
3326            // initialization.
3327            CGF.EmitOMPAggregateAssign(
3328                PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3329                Type,
3330                [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3331                                                  Address SrcElement) {
3332                  // Clean up any temporaries needed by the initialization.
3333                  CodeGenFunction::OMPPrivateScope InitScope(CGF);
3334                  InitScope.addPrivate(Elem, SrcElement);
3335                  (void)InitScope.Privatize();
3336                  // Emit initialization for single element.
3337                  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3338                      CGF, &CapturesInfo);
3339                  CGF.EmitAnyExprToMem(Init, DestElement,
3340                                       Init->getType().getQualifiers(),
3341                                       /*IsInitializer=*/false);
3342                });
3343          }
3344        } else {
3345          CodeGenFunction::OMPPrivateScope InitScope(CGF);
3346          InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3347          (void)InitScope.Privatize();
3348          CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3349          CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3350                             /*capturedByInit=*/false);
3351        }
3352      } else {
3353        CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3354      }
3355    }
3356    ++FI;
3357  }
3358}
3359
3360/// Check if duplication function is required for taskloops.
3361static bool checkInitIsRequired(CodeGenFunction &CGF,
3362                                ArrayRef<PrivateDataTy> Privates) {
3363  bool InitRequired = false;
3364  for (const PrivateDataTy &Pair : Privates) {
3365    if (Pair.second.isLocalPrivate())
3366      continue;
3367    const VarDecl *VD = Pair.second.PrivateCopy;
3368    const Expr *Init = VD->getAnyInitializer();
3369    InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3370                                    !CGF.isTrivialInitializer(Init));
3371    if (InitRequired)
3372      break;
3373  }
3374  return InitRequired;
3375}
3376
3377
3378/// Emit task_dup function (for initialization of
3379/// private/firstprivate/lastprivate vars and last_iter flag)
3380/// \code
3381/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3382/// lastpriv) {
3383/// // setup lastprivate flag
3384///    task_dst->last = lastpriv;
3385/// // could be constructor calls here...
3386/// }
3387/// \endcode
3388static llvm::Value *
3389emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3390                    const OMPExecutableDirective &D,
3391                    QualType KmpTaskTWithPrivatesPtrQTy,
3392                    const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3393                    const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3394                    QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3395                    ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3396  ASTContext &C = CGM.getContext();
3397  FunctionArgList Args;
3398  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3399                           KmpTaskTWithPrivatesPtrQTy,
3400                           ImplicitParamKind::Other);
3401  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3402                           KmpTaskTWithPrivatesPtrQTy,
3403                           ImplicitParamKind::Other);
3404  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3405                                ImplicitParamKind::Other);
3406  Args.push_back(&DstArg);
3407  Args.push_back(&SrcArg);
3408  Args.push_back(&LastprivArg);
3409  const auto &TaskDupFnInfo =
3410      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3411  llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3412  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3413  auto *TaskDup = llvm::Function::Create(
3414      TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3415  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3416  TaskDup->setDoesNotRecurse();
3417  CodeGenFunction CGF(CGM);
3418  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3419                    Loc);
3420
3421  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3422      CGF.GetAddrOfLocalVar(&DstArg),
3423      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3424  // task_dst->liter = lastpriv;
3425  if (WithLastIter) {
3426    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3427    LValue Base = CGF.EmitLValueForField(
3428        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3429    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3430    llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3431        CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3432    CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3433  }
3434
3435  // Emit initial values for private copies (if any).
3436  assert(!Privates.empty());
3437  Address KmpTaskSharedsPtr = Address::invalid();
3438  if (!Data.FirstprivateVars.empty()) {
3439    LValue TDBase = CGF.EmitLoadOfPointerLValue(
3440        CGF.GetAddrOfLocalVar(&SrcArg),
3441        KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3442    LValue Base = CGF.EmitLValueForField(
3443        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3444    KmpTaskSharedsPtr = Address(
3445        CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3446                                 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3447                                                  KmpTaskTShareds)),
3448                             Loc),
3449        CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3450  }
3451  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3452                   SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3453  CGF.FinishFunction();
3454  return TaskDup;
3455}
3456
3457/// Checks if destructor function is required to be generated.
3458/// \return true if cleanups are required, false otherwise.
3459static bool
3460checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3461                         ArrayRef<PrivateDataTy> Privates) {
3462  for (const PrivateDataTy &P : Privates) {
3463    if (P.second.isLocalPrivate())
3464      continue;
3465    QualType Ty = P.second.Original->getType().getNonReferenceType();
3466    if (Ty.isDestructedType())
3467      return true;
3468  }
3469  return false;
3470}
3471
3472namespace {
3473/// Loop generator for OpenMP iterator expression.
3474class OMPIteratorGeneratorScope final
3475    : public CodeGenFunction::OMPPrivateScope {
3476  CodeGenFunction &CGF;
3477  const OMPIteratorExpr *E = nullptr;
3478  SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3479  SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3480  OMPIteratorGeneratorScope() = delete;
3481  OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3482
3483public:
3484  OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3485      : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3486    if (!E)
3487      return;
3488    SmallVector<llvm::Value *, 4> Uppers;
3489    for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3490      Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3491      const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3492      addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3493      const OMPIteratorHelperData &HelperData = E->getHelper(I);
3494      addPrivate(
3495          HelperData.CounterVD,
3496          CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3497    }
3498    Privatize();
3499
3500    for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3501      const OMPIteratorHelperData &HelperData = E->getHelper(I);
3502      LValue CLVal =
3503          CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3504                             HelperData.CounterVD->getType());
3505      // Counter = 0;
3506      CGF.EmitStoreOfScalar(
3507          llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3508          CLVal);
3509      CodeGenFunction::JumpDest &ContDest =
3510          ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3511      CodeGenFunction::JumpDest &ExitDest =
3512          ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3513      // N = <number-of_iterations>;
3514      llvm::Value *N = Uppers[I];
3515      // cont:
3516      // if (Counter < N) goto body; else goto exit;
3517      CGF.EmitBlock(ContDest.getBlock());
3518      auto *CVal =
3519          CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3520      llvm::Value *Cmp =
3521          HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3522              ? CGF.Builder.CreateICmpSLT(CVal, N)
3523              : CGF.Builder.CreateICmpULT(CVal, N);
3524      llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3525      CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3526      // body:
3527      CGF.EmitBlock(BodyBB);
3528      // Iteri = Begini + Counter * Stepi;
3529      CGF.EmitIgnoredExpr(HelperData.Update);
3530    }
3531  }
3532  ~OMPIteratorGeneratorScope() {
3533    if (!E)
3534      return;
3535    for (unsigned I = E->numOfIterators(); I > 0; --I) {
3536      // Counter = Counter + 1;
3537      const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3538      CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3539      // goto cont;
3540      CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3541      // exit:
3542      CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3543    }
3544  }
3545};
3546} // namespace
3547
3548static std::pair<llvm::Value *, llvm::Value *>
3549getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3550  const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3551  llvm::Value *Addr;
3552  if (OASE) {
3553    const Expr *Base = OASE->getBase();
3554    Addr = CGF.EmitScalarExpr(Base);
3555  } else {
3556    Addr = CGF.EmitLValue(E).getPointer(CGF);
3557  }
3558  llvm::Value *SizeVal;
3559  QualType Ty = E->getType();
3560  if (OASE) {
3561    SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3562    for (const Expr *SE : OASE->getDimensions()) {
3563      llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3564      Sz = CGF.EmitScalarConversion(
3565          Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3566      SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3567    }
3568  } else if (const auto *ASE =
3569                 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3570    LValue UpAddrLVal =
3571        CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
3572    Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3573    llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3574        UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
3575    llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3576    llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3577    SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3578  } else {
3579    SizeVal = CGF.getTypeSize(Ty);
3580  }
3581  return std::make_pair(Addr, SizeVal);
3582}
3583
3584/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3585static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3586  QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3587  if (KmpTaskAffinityInfoTy.isNull()) {
3588    RecordDecl *KmpAffinityInfoRD =
3589        C.buildImplicitRecord("kmp_task_affinity_info_t");
3590    KmpAffinityInfoRD->startDefinition();
3591    addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3592    addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3593    addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3594    KmpAffinityInfoRD->completeDefinition();
3595    KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3596  }
3597}
3598
3599CGOpenMPRuntime::TaskResultTy
3600CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3601                              const OMPExecutableDirective &D,
3602                              llvm::Function *TaskFunction, QualType SharedsTy,
3603                              Address Shareds, const OMPTaskDataTy &Data) {
3604  ASTContext &C = CGM.getContext();
3605  llvm::SmallVector<PrivateDataTy, 4> Privates;
3606  // Aggregate privates and sort them by the alignment.
3607  const auto *I = Data.PrivateCopies.begin();
3608  for (const Expr *E : Data.PrivateVars) {
3609    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3610    Privates.emplace_back(
3611        C.getDeclAlign(VD),
3612        PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3613                         /*PrivateElemInit=*/nullptr));
3614    ++I;
3615  }
3616  I = Data.FirstprivateCopies.begin();
3617  const auto *IElemInitRef = Data.FirstprivateInits.begin();
3618  for (const Expr *E : Data.FirstprivateVars) {
3619    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3620    Privates.emplace_back(
3621        C.getDeclAlign(VD),
3622        PrivateHelpersTy(
3623            E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3624            cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3625    ++I;
3626    ++IElemInitRef;
3627  }
3628  I = Data.LastprivateCopies.begin();
3629  for (const Expr *E : Data.LastprivateVars) {
3630    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3631    Privates.emplace_back(
3632        C.getDeclAlign(VD),
3633        PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3634                         /*PrivateElemInit=*/nullptr));
3635    ++I;
3636  }
3637  for (const VarDecl *VD : Data.PrivateLocals) {
3638    if (isAllocatableDecl(VD))
3639      Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3640    else
3641      Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3642  }
3643  llvm::stable_sort(Privates,
3644                    [](const PrivateDataTy &L, const PrivateDataTy &R) {
3645                      return L.first > R.first;
3646                    });
3647  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3648  // Build type kmp_routine_entry_t (if not built yet).
3649  emitKmpRoutineEntryT(KmpInt32Ty);
3650  // Build type kmp_task_t (if not built yet).
3651  if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3652    if (SavedKmpTaskloopTQTy.isNull()) {
3653      SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3654          CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3655    }
3656    KmpTaskTQTy = SavedKmpTaskloopTQTy;
3657  } else {
3658    assert((D.getDirectiveKind() == OMPD_task ||
3659            isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3660            isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3661           "Expected taskloop, task or target directive");
3662    if (SavedKmpTaskTQTy.isNull()) {
3663      SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3664          CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3665    }
3666    KmpTaskTQTy = SavedKmpTaskTQTy;
3667  }
3668  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3669  // Build particular struct kmp_task_t for the given task.
3670  const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3671      createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3672  QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3673  QualType KmpTaskTWithPrivatesPtrQTy =
3674      C.getPointerType(KmpTaskTWithPrivatesQTy);
3675  llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3676  llvm::Type *KmpTaskTWithPrivatesPtrTy =
3677      KmpTaskTWithPrivatesTy->getPointerTo();
3678  llvm::Value *KmpTaskTWithPrivatesTySize =
3679      CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3680  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3681
3682  // Emit initial values for private copies (if any).
3683  llvm::Value *TaskPrivatesMap = nullptr;
3684  llvm::Type *TaskPrivatesMapTy =
3685      std::next(TaskFunction->arg_begin(), 3)->getType();
3686  if (!Privates.empty()) {
3687    auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3688    TaskPrivatesMap =
3689        emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3690    TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3691        TaskPrivatesMap, TaskPrivatesMapTy);
3692  } else {
3693    TaskPrivatesMap = llvm::ConstantPointerNull::get(
3694        cast<llvm::PointerType>(TaskPrivatesMapTy));
3695  }
3696  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3697  // kmp_task_t *tt);
3698  llvm::Function *TaskEntry = emitProxyTaskFunction(
3699      CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3700      KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3701      TaskPrivatesMap);
3702
3703  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3704  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3705  // kmp_routine_entry_t *task_entry);
3706  // Task flags. Format is taken from
3707  // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3708  // description of kmp_tasking_flags struct.
3709  enum {
3710    TiedFlag = 0x1,
3711    FinalFlag = 0x2,
3712    DestructorsFlag = 0x8,
3713    PriorityFlag = 0x20,
3714    DetachableFlag = 0x40,
3715  };
3716  unsigned Flags = Data.Tied ? TiedFlag : 0;
3717  bool NeedsCleanup = false;
3718  if (!Privates.empty()) {
3719    NeedsCleanup =
3720        checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3721    if (NeedsCleanup)
3722      Flags = Flags | DestructorsFlag;
3723  }
3724  if (Data.Priority.getInt())
3725    Flags = Flags | PriorityFlag;
3726  if (D.hasClausesOfKind<OMPDetachClause>())
3727    Flags = Flags | DetachableFlag;
3728  llvm::Value *TaskFlags =
3729      Data.Final.getPointer()
3730          ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3731                                     CGF.Builder.getInt32(FinalFlag),
3732                                     CGF.Builder.getInt32(/*C=*/0))
3733          : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3734  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3735  llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3736  SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3737      getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3738      SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3739          TaskEntry, KmpRoutineEntryPtrTy)};
3740  llvm::Value *NewTask;
3741  if (D.hasClausesOfKind<OMPNowaitClause>()) {
3742    // Check if we have any device clause associated with the directive.
3743    const Expr *Device = nullptr;
3744    if (auto *C = D.getSingleClause<OMPDeviceClause>())
3745      Device = C->getDevice();
3746    // Emit device ID if any otherwise use default value.
3747    llvm::Value *DeviceID;
3748    if (Device)
3749      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3750                                           CGF.Int64Ty, /*isSigned=*/true);
3751    else
3752      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3753    AllocArgs.push_back(DeviceID);
3754    NewTask = CGF.EmitRuntimeCall(
3755        OMPBuilder.getOrCreateRuntimeFunction(
3756            CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3757        AllocArgs);
3758  } else {
3759    NewTask =
3760        CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3761                                CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3762                            AllocArgs);
3763  }
3764  // Emit detach clause initialization.
3765  // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3766  // task_descriptor);
3767  if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3768    const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3769    LValue EvtLVal = CGF.EmitLValue(Evt);
3770
3771    // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3772    // int gtid, kmp_task_t *task);
3773    llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3774    llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3775    Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3776    llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3777        OMPBuilder.getOrCreateRuntimeFunction(
3778            CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3779        {Loc, Tid, NewTask});
3780    EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3781                                      Evt->getExprLoc());
3782    CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3783  }
3784  // Process affinity clauses.
3785  if (D.hasClausesOfKind<OMPAffinityClause>()) {
3786    // Process list of affinity data.
3787    ASTContext &C = CGM.getContext();
3788    Address AffinitiesArray = Address::invalid();
3789    // Calculate number of elements to form the array of affinity data.
3790    llvm::Value *NumOfElements = nullptr;
3791    unsigned NumAffinities = 0;
3792    for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3793      if (const Expr *Modifier = C->getModifier()) {
3794        const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3795        for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3796          llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3797          Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3798          NumOfElements =
3799              NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3800        }
3801      } else {
3802        NumAffinities += C->varlist_size();
3803      }
3804    }
3805    getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3806    // Fields ids in kmp_task_affinity_info record.
3807    enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3808
3809    QualType KmpTaskAffinityInfoArrayTy;
3810    if (NumOfElements) {
3811      NumOfElements = CGF.Builder.CreateNUWAdd(
3812          llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3813      auto *OVE = new (C) OpaqueValueExpr(
3814          Loc,
3815          C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3816          VK_PRValue);
3817      CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3818                                                    RValue::get(NumOfElements));
3819      KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3820          KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
3821          /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3822      // Properly emit variable-sized array.
3823      auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3824                                           ImplicitParamKind::Other);
3825      CGF.EmitVarDecl(*PD);
3826      AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3827      NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3828                                                /*isSigned=*/false);
3829    } else {
3830      KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3831          KmpTaskAffinityInfoTy,
3832          llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3833          ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3834      AffinitiesArray =
3835          CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3836      AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3837      NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3838                                             /*isSigned=*/false);
3839    }
3840
3841    const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3842    // Fill array by elements without iterators.
3843    unsigned Pos = 0;
3844    bool HasIterator = false;
3845    for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3846      if (C->getModifier()) {
3847        HasIterator = true;
3848        continue;
3849      }
3850      for (const Expr *E : C->varlists()) {
3851        llvm::Value *Addr;
3852        llvm::Value *Size;
3853        std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3854        LValue Base =
3855            CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3856                               KmpTaskAffinityInfoTy);
3857        // affs[i].base_addr = &<Affinities[i].second>;
3858        LValue BaseAddrLVal = CGF.EmitLValueForField(
3859            Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3860        CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3861                              BaseAddrLVal);
3862        // affs[i].len = sizeof(<Affinities[i].second>);
3863        LValue LenLVal = CGF.EmitLValueForField(
3864            Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3865        CGF.EmitStoreOfScalar(Size, LenLVal);
3866        ++Pos;
3867      }
3868    }
3869    LValue PosLVal;
3870    if (HasIterator) {
3871      PosLVal = CGF.MakeAddrLValue(
3872          CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3873          C.getSizeType());
3874      CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3875    }
3876    // Process elements with iterators.
3877    for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3878      const Expr *Modifier = C->getModifier();
3879      if (!Modifier)
3880        continue;
3881      OMPIteratorGeneratorScope IteratorScope(
3882          CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3883      for (const Expr *E : C->varlists()) {
3884        llvm::Value *Addr;
3885        llvm::Value *Size;
3886        std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3887        llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3888        LValue Base = CGF.MakeAddrLValue(
3889            CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
3890        // affs[i].base_addr = &<Affinities[i].second>;
3891        LValue BaseAddrLVal = CGF.EmitLValueForField(
3892            Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3893        CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3894                              BaseAddrLVal);
3895        // affs[i].len = sizeof(<Affinities[i].second>);
3896        LValue LenLVal = CGF.EmitLValueForField(
3897            Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3898        CGF.EmitStoreOfScalar(Size, LenLVal);
3899        Idx = CGF.Builder.CreateNUWAdd(
3900            Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3901        CGF.EmitStoreOfScalar(Idx, PosLVal);
3902      }
3903    }
3904    // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3905    // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3906    // naffins, kmp_task_affinity_info_t *affin_list);
3907    llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3908    llvm::Value *GTid = getThreadID(CGF, Loc);
3909    llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3910        AffinitiesArray.getPointer(), CGM.VoidPtrTy);
3911    // FIXME: Emit the function and ignore its result for now unless the
3912    // runtime function is properly implemented.
3913    (void)CGF.EmitRuntimeCall(
3914        OMPBuilder.getOrCreateRuntimeFunction(
3915            CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3916        {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3917  }
3918  llvm::Value *NewTaskNewTaskTTy =
3919      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3920          NewTask, KmpTaskTWithPrivatesPtrTy);
3921  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
3922                                               KmpTaskTWithPrivatesQTy);
3923  LValue TDBase =
3924      CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3925  // Fill the data in the resulting kmp_task_t record.
3926  // Copy shareds if there are any.
3927  Address KmpTaskSharedsPtr = Address::invalid();
3928  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3929    KmpTaskSharedsPtr = Address(
3930        CGF.EmitLoadOfScalar(
3931            CGF.EmitLValueForField(
3932                TDBase,
3933                *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3934            Loc),
3935        CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3936    LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3937    LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3938    CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3939  }
3940  // Emit initial values for private copies (if any).
3941  TaskResultTy Result;
3942  if (!Privates.empty()) {
3943    emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3944                     SharedsTy, SharedsPtrTy, Data, Privates,
3945                     /*ForDup=*/false);
3946    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3947        (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3948      Result.TaskDupFn = emitTaskDupFunction(
3949          CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3950          KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3951          /*WithLastIter=*/!Data.LastprivateVars.empty());
3952    }
3953  }
3954  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3955  enum { Priority = 0, Destructors = 1 };
3956  // Provide pointer to function with destructors for privates.
3957  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3958  const RecordDecl *KmpCmplrdataUD =
3959      (*FI)->getType()->getAsUnionType()->getDecl();
3960  if (NeedsCleanup) {
3961    llvm::Value *DestructorFn = emitDestructorsFunction(
3962        CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3963        KmpTaskTWithPrivatesQTy);
3964    LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3965    LValue DestructorsLV = CGF.EmitLValueForField(
3966        Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3967    CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3968                              DestructorFn, KmpRoutineEntryPtrTy),
3969                          DestructorsLV);
3970  }
3971  // Set priority.
3972  if (Data.Priority.getInt()) {
3973    LValue Data2LV = CGF.EmitLValueForField(
3974        TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3975    LValue PriorityLV = CGF.EmitLValueForField(
3976        Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3977    CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3978  }
3979  Result.NewTask = NewTask;
3980  Result.TaskEntry = TaskEntry;
3981  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3982  Result.TDBase = TDBase;
3983  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3984  return Result;
3985}
3986
3987/// Translates internal dependency kind into the runtime kind.
3988static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
3989  RTLDependenceKindTy DepKind;
3990  switch (K) {
3991  case OMPC_DEPEND_in:
3992    DepKind = RTLDependenceKindTy::DepIn;
3993    break;
3994  // Out and InOut dependencies must use the same code.
3995  case OMPC_DEPEND_out:
3996  case OMPC_DEPEND_inout:
3997    DepKind = RTLDependenceKindTy::DepInOut;
3998    break;
3999  case OMPC_DEPEND_mutexinoutset:
4000    DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4001    break;
4002  case OMPC_DEPEND_inoutset:
4003    DepKind = RTLDependenceKindTy::DepInOutSet;
4004    break;
4005  case OMPC_DEPEND_outallmemory:
4006    DepKind = RTLDependenceKindTy::DepOmpAllMem;
4007    break;
4008  case OMPC_DEPEND_source:
4009  case OMPC_DEPEND_sink:
4010  case OMPC_DEPEND_depobj:
4011  case OMPC_DEPEND_inoutallmemory:
4012  case OMPC_DEPEND_unknown:
4013    llvm_unreachable("Unknown task dependence type");
4014  }
4015  return DepKind;
4016}
4017
4018/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4019static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4020                           QualType &FlagsTy) {
4021  FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4022  if (KmpDependInfoTy.isNull()) {
4023    RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4024    KmpDependInfoRD->startDefinition();
4025    addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4026    addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4027    addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4028    KmpDependInfoRD->completeDefinition();
4029    KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4030  }
4031}
4032
4033std::pair<llvm::Value *, LValue>
4034CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4035                                   SourceLocation Loc) {
4036  ASTContext &C = CGM.getContext();
4037  QualType FlagsTy;
4038  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4039  RecordDecl *KmpDependInfoRD =
4040      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4041  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4042  LValue Base = CGF.EmitLoadOfPointerLValue(
4043      DepobjLVal.getAddress(CGF).withElementType(
4044          CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4045      KmpDependInfoPtrTy->castAs<PointerType>());
4046  Address DepObjAddr = CGF.Builder.CreateGEP(
4047      Base.getAddress(CGF),
4048      llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4049  LValue NumDepsBase = CGF.MakeAddrLValue(
4050      DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4051  // NumDeps = deps[i].base_addr;
4052  LValue BaseAddrLVal = CGF.EmitLValueForField(
4053      NumDepsBase,
4054      *std::next(KmpDependInfoRD->field_begin(),
4055                 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4056  llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4057  return std::make_pair(NumDeps, Base);
4058}
4059
4060static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4061                           llvm::PointerUnion<unsigned *, LValue *> Pos,
4062                           const OMPTaskDataTy::DependData &Data,
4063                           Address DependenciesArray) {
4064  CodeGenModule &CGM = CGF.CGM;
4065  ASTContext &C = CGM.getContext();
4066  QualType FlagsTy;
4067  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4068  RecordDecl *KmpDependInfoRD =
4069      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4070  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4071
4072  OMPIteratorGeneratorScope IteratorScope(
4073      CGF, cast_or_null<OMPIteratorExpr>(
4074               Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4075                                 : nullptr));
4076  for (const Expr *E : Data.DepExprs) {
4077    llvm::Value *Addr;
4078    llvm::Value *Size;
4079
4080    // The expression will be a nullptr in the 'omp_all_memory' case.
4081    if (E) {
4082      std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4083      Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4084    } else {
4085      Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4086      Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4087    }
4088    LValue Base;
4089    if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4090      Base = CGF.MakeAddrLValue(
4091          CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4092    } else {
4093      assert(E && "Expected a non-null expression");
4094      LValue &PosLVal = *Pos.get<LValue *>();
4095      llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4096      Base = CGF.MakeAddrLValue(
4097          CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4098    }
4099    // deps[i].base_addr = &<Dependencies[i].second>;
4100    LValue BaseAddrLVal = CGF.EmitLValueForField(
4101        Base,
4102        *std::next(KmpDependInfoRD->field_begin(),
4103                   static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4104    CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4105    // deps[i].len = sizeof(<Dependencies[i].second>);
4106    LValue LenLVal = CGF.EmitLValueForField(
4107        Base, *std::next(KmpDependInfoRD->field_begin(),
4108                         static_cast<unsigned int>(RTLDependInfoFields::Len)));
4109    CGF.EmitStoreOfScalar(Size, LenLVal);
4110    // deps[i].flags = <Dependencies[i].first>;
4111    RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4112    LValue FlagsLVal = CGF.EmitLValueForField(
4113        Base,
4114        *std::next(KmpDependInfoRD->field_begin(),
4115                   static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4116    CGF.EmitStoreOfScalar(
4117        llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4118        FlagsLVal);
4119    if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4120      ++(*P);
4121    } else {
4122      LValue &PosLVal = *Pos.get<LValue *>();
4123      llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4124      Idx = CGF.Builder.CreateNUWAdd(Idx,
4125                                     llvm::ConstantInt::get(Idx->getType(), 1));
4126      CGF.EmitStoreOfScalar(Idx, PosLVal);
4127    }
4128  }
4129}
4130
4131SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4132    CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4133    const OMPTaskDataTy::DependData &Data) {
4134  assert(Data.DepKind == OMPC_DEPEND_depobj &&
4135         "Expected depobj dependency kind.");
4136  SmallVector<llvm::Value *, 4> Sizes;
4137  SmallVector<LValue, 4> SizeLVals;
4138  ASTContext &C = CGF.getContext();
4139  {
4140    OMPIteratorGeneratorScope IteratorScope(
4141        CGF, cast_or_null<OMPIteratorExpr>(
4142                 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4143                                   : nullptr));
4144    for (const Expr *E : Data.DepExprs) {
4145      llvm::Value *NumDeps;
4146      LValue Base;
4147      LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4148      std::tie(NumDeps, Base) =
4149          getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4150      LValue NumLVal = CGF.MakeAddrLValue(
4151          CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4152          C.getUIntPtrType());
4153      CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4154                              NumLVal.getAddress(CGF));
4155      llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4156      llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4157      CGF.EmitStoreOfScalar(Add, NumLVal);
4158      SizeLVals.push_back(NumLVal);
4159    }
4160  }
4161  for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4162    llvm::Value *Size =
4163        CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4164    Sizes.push_back(Size);
4165  }
4166  return Sizes;
4167}
4168
4169void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4170                                         QualType &KmpDependInfoTy,
4171                                         LValue PosLVal,
4172                                         const OMPTaskDataTy::DependData &Data,
4173                                         Address DependenciesArray) {
4174  assert(Data.DepKind == OMPC_DEPEND_depobj &&
4175         "Expected depobj dependency kind.");
4176  llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4177  {
4178    OMPIteratorGeneratorScope IteratorScope(
4179        CGF, cast_or_null<OMPIteratorExpr>(
4180                 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4181                                   : nullptr));
4182    for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4183      const Expr *E = Data.DepExprs[I];
4184      llvm::Value *NumDeps;
4185      LValue Base;
4186      LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4187      std::tie(NumDeps, Base) =
4188          getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4189
4190      // memcopy dependency data.
4191      llvm::Value *Size = CGF.Builder.CreateNUWMul(
4192          ElSize,
4193          CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4194      llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4195      Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4196      CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4197
4198      // Increase pos.
4199      // pos += size;
4200      llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4201      CGF.EmitStoreOfScalar(Add, PosLVal);
4202    }
4203  }
4204}
4205
4206std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4207    CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4208    SourceLocation Loc) {
4209  if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4210        return D.DepExprs.empty();
4211      }))
4212    return std::make_pair(nullptr, Address::invalid());
4213  // Process list of dependencies.
4214  ASTContext &C = CGM.getContext();
4215  Address DependenciesArray = Address::invalid();
4216  llvm::Value *NumOfElements = nullptr;
4217  unsigned NumDependencies = std::accumulate(
4218      Dependencies.begin(), Dependencies.end(), 0,
4219      [](unsigned V, const OMPTaskDataTy::DependData &D) {
4220        return D.DepKind == OMPC_DEPEND_depobj
4221                   ? V
4222                   : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4223      });
4224  QualType FlagsTy;
4225  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4226  bool HasDepobjDeps = false;
4227  bool HasRegularWithIterators = false;
4228  llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4229  llvm::Value *NumOfRegularWithIterators =
4230      llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4231  // Calculate number of depobj dependencies and regular deps with the
4232  // iterators.
4233  for (const OMPTaskDataTy::DependData &D : Dependencies) {
4234    if (D.DepKind == OMPC_DEPEND_depobj) {
4235      SmallVector<llvm::Value *, 4> Sizes =
4236          emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4237      for (llvm::Value *Size : Sizes) {
4238        NumOfDepobjElements =
4239            CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4240      }
4241      HasDepobjDeps = true;
4242      continue;
4243    }
4244    // Include number of iterations, if any.
4245
4246    if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4247      for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4248        llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4249        Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4250        llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4251            Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4252        NumOfRegularWithIterators =
4253            CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4254      }
4255      HasRegularWithIterators = true;
4256      continue;
4257    }
4258  }
4259
4260  QualType KmpDependInfoArrayTy;
4261  if (HasDepobjDeps || HasRegularWithIterators) {
4262    NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4263                                           /*isSigned=*/false);
4264    if (HasDepobjDeps) {
4265      NumOfElements =
4266          CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4267    }
4268    if (HasRegularWithIterators) {
4269      NumOfElements =
4270          CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4271    }
4272    auto *OVE = new (C) OpaqueValueExpr(
4273        Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4274        VK_PRValue);
4275    CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4276                                                  RValue::get(NumOfElements));
4277    KmpDependInfoArrayTy =
4278        C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4279                               /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4280    // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4281    // Properly emit variable-sized array.
4282    auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4283                                         ImplicitParamKind::Other);
4284    CGF.EmitVarDecl(*PD);
4285    DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4286    NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4287                                              /*isSigned=*/false);
4288  } else {
4289    KmpDependInfoArrayTy = C.getConstantArrayType(
4290        KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4291        ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4292    DependenciesArray =
4293        CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4294    DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4295    NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4296                                           /*isSigned=*/false);
4297  }
4298  unsigned Pos = 0;
4299  for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4300    if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4301        Dependencies[I].IteratorExpr)
4302      continue;
4303    emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4304                   DependenciesArray);
4305  }
4306  // Copy regular dependencies with iterators.
4307  LValue PosLVal = CGF.MakeAddrLValue(
4308      CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4309  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4310  for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4311    if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4312        !Dependencies[I].IteratorExpr)
4313      continue;
4314    emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4315                   DependenciesArray);
4316  }
4317  // Copy final depobj arrays without iterators.
4318  if (HasDepobjDeps) {
4319    for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4320      if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4321        continue;
4322      emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4323                         DependenciesArray);
4324    }
4325  }
4326  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4327      DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4328  return std::make_pair(NumOfElements, DependenciesArray);
4329}
4330
4331Address CGOpenMPRuntime::emitDepobjDependClause(
4332    CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4333    SourceLocation Loc) {
4334  if (Dependencies.DepExprs.empty())
4335    return Address::invalid();
4336  // Process list of dependencies.
4337  ASTContext &C = CGM.getContext();
4338  Address DependenciesArray = Address::invalid();
4339  unsigned NumDependencies = Dependencies.DepExprs.size();
4340  QualType FlagsTy;
4341  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4342  RecordDecl *KmpDependInfoRD =
4343      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4344
4345  llvm::Value *Size;
4346  // Define type kmp_depend_info[<Dependencies.size()>];
4347  // For depobj reserve one extra element to store the number of elements.
4348  // It is required to handle depobj(x) update(in) construct.
4349  // kmp_depend_info[<Dependencies.size()>] deps;
4350  llvm::Value *NumDepsVal;
4351  CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4352  if (const auto *IE =
4353          cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4354    NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4355    for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4356      llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4357      Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4358      NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4359    }
4360    Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4361                                    NumDepsVal);
4362    CharUnits SizeInBytes =
4363        C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4364    llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4365    Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4366    NumDepsVal =
4367        CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4368  } else {
4369    QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4370        KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4371        nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4372    CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4373    Size = CGM.getSize(Sz.alignTo(Align));
4374    NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4375  }
4376  // Need to allocate on the dynamic memory.
4377  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4378  // Use default allocator.
4379  llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4380  llvm::Value *Args[] = {ThreadID, Size, Allocator};
4381
4382  llvm::Value *Addr =
4383      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4384                              CGM.getModule(), OMPRTL___kmpc_alloc),
4385                          Args, ".dep.arr.addr");
4386  llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4387  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4388      Addr, KmpDependInfoLlvmTy->getPointerTo());
4389  DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4390  // Write number of elements in the first element of array for depobj.
4391  LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4392  // deps[i].base_addr = NumDependencies;
4393  LValue BaseAddrLVal = CGF.EmitLValueForField(
4394      Base,
4395      *std::next(KmpDependInfoRD->field_begin(),
4396                 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4397  CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4398  llvm::PointerUnion<unsigned *, LValue *> Pos;
4399  unsigned Idx = 1;
4400  LValue PosLVal;
4401  if (Dependencies.IteratorExpr) {
4402    PosLVal = CGF.MakeAddrLValue(
4403        CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4404        C.getSizeType());
4405    CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4406                          /*IsInit=*/true);
4407    Pos = &PosLVal;
4408  } else {
4409    Pos = &Idx;
4410  }
4411  emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4412  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4413      CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4414      CGF.Int8Ty);
4415  return DependenciesArray;
4416}
4417
4418void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4419                                        SourceLocation Loc) {
4420  ASTContext &C = CGM.getContext();
4421  QualType FlagsTy;
4422  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4423  LValue Base = CGF.EmitLoadOfPointerLValue(
4424      DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4425  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4426  Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4427      Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4428      CGF.ConvertTypeForMem(KmpDependInfoTy));
4429  llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4430      Addr.getElementType(), Addr.getPointer(),
4431      llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4432  DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4433                                                               CGF.VoidPtrTy);
4434  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4435  // Use default allocator.
4436  llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4437  llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4438
4439  // _kmpc_free(gtid, addr, nullptr);
4440  (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4441                                CGM.getModule(), OMPRTL___kmpc_free),
4442                            Args);
4443}
4444
4445void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4446                                       OpenMPDependClauseKind NewDepKind,
4447                                       SourceLocation Loc) {
4448  ASTContext &C = CGM.getContext();
4449  QualType FlagsTy;
4450  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4451  RecordDecl *KmpDependInfoRD =
4452      cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4453  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4454  llvm::Value *NumDeps;
4455  LValue Base;
4456  std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4457
4458  Address Begin = Base.getAddress(CGF);
4459  // Cast from pointer to array type to pointer to single element.
4460  llvm::Value *End = CGF.Builder.CreateGEP(
4461      Begin.getElementType(), Begin.getPointer(), NumDeps);
4462  // The basic structure here is a while-do loop.
4463  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4464  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4465  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4466  CGF.EmitBlock(BodyBB);
4467  llvm::PHINode *ElementPHI =
4468      CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4469  ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4470  Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4471  Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4472                            Base.getTBAAInfo());
4473  // deps[i].flags = NewDepKind;
4474  RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4475  LValue FlagsLVal = CGF.EmitLValueForField(
4476      Base, *std::next(KmpDependInfoRD->field_begin(),
4477                       static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4478  CGF.EmitStoreOfScalar(
4479      llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4480      FlagsLVal);
4481
4482  // Shift the address forward by one element.
4483  Address ElementNext =
4484      CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4485  ElementPHI->addIncoming(ElementNext.getPointer(),
4486                          CGF.Builder.GetInsertBlock());
4487  llvm::Value *IsEmpty =
4488      CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
4489  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4490  // Done.
4491  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4492}
4493
4494void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4495                                   const OMPExecutableDirective &D,
4496                                   llvm::Function *TaskFunction,
4497                                   QualType SharedsTy, Address Shareds,
4498                                   const Expr *IfCond,
4499                                   const OMPTaskDataTy &Data) {
4500  if (!CGF.HaveInsertPoint())
4501    return;
4502
4503  TaskResultTy Result =
4504      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4505  llvm::Value *NewTask = Result.NewTask;
4506  llvm::Function *TaskEntry = Result.TaskEntry;
4507  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4508  LValue TDBase = Result.TDBase;
4509  const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4510  // Process list of dependences.
4511  Address DependenciesArray = Address::invalid();
4512  llvm::Value *NumOfElements;
4513  std::tie(NumOfElements, DependenciesArray) =
4514      emitDependClause(CGF, Data.Dependences, Loc);
4515
4516  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4517  // libcall.
4518  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4519  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4520  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4521  // list is not empty
4522  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4523  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4524  llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4525  llvm::Value *DepTaskArgs[7];
4526  if (!Data.Dependences.empty()) {
4527    DepTaskArgs[0] = UpLoc;
4528    DepTaskArgs[1] = ThreadID;
4529    DepTaskArgs[2] = NewTask;
4530    DepTaskArgs[3] = NumOfElements;
4531    DepTaskArgs[4] = DependenciesArray.getPointer();
4532    DepTaskArgs[5] = CGF.Builder.getInt32(0);
4533    DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4534  }
4535  auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4536                        &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4537    if (!Data.Tied) {
4538      auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4539      LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4540      CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4541    }
4542    if (!Data.Dependences.empty()) {
4543      CGF.EmitRuntimeCall(
4544          OMPBuilder.getOrCreateRuntimeFunction(
4545              CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4546          DepTaskArgs);
4547    } else {
4548      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4549                              CGM.getModule(), OMPRTL___kmpc_omp_task),
4550                          TaskArgs);
4551    }
4552    // Check if parent region is untied and build return for untied task;
4553    if (auto *Region =
4554            dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4555      Region->emitUntiedSwitch(CGF);
4556  };
4557
4558  llvm::Value *DepWaitTaskArgs[7];
4559  if (!Data.Dependences.empty()) {
4560    DepWaitTaskArgs[0] = UpLoc;
4561    DepWaitTaskArgs[1] = ThreadID;
4562    DepWaitTaskArgs[2] = NumOfElements;
4563    DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4564    DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4565    DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4566    DepWaitTaskArgs[6] =
4567        llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4568  }
4569  auto &M = CGM.getModule();
4570  auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4571                        TaskEntry, &Data, &DepWaitTaskArgs,
4572                        Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4573    CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4574    // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4575    // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4576    // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4577    // is specified.
4578    if (!Data.Dependences.empty())
4579      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4580                              M, OMPRTL___kmpc_omp_taskwait_deps_51),
4581                          DepWaitTaskArgs);
4582    // Call proxy_task_entry(gtid, new_task);
4583    auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4584                      Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4585      Action.Enter(CGF);
4586      llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4587      CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4588                                                          OutlinedFnArgs);
4589    };
4590
4591    // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4592    // kmp_task_t *new_task);
4593    // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4594    // kmp_task_t *new_task);
4595    RegionCodeGenTy RCG(CodeGen);
4596    CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4597                              M, OMPRTL___kmpc_omp_task_begin_if0),
4598                          TaskArgs,
4599                          OMPBuilder.getOrCreateRuntimeFunction(
4600                              M, OMPRTL___kmpc_omp_task_complete_if0),
4601                          TaskArgs);
4602    RCG.setAction(Action);
4603    RCG(CGF);
4604  };
4605
4606  if (IfCond) {
4607    emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4608  } else {
4609    RegionCodeGenTy ThenRCG(ThenCodeGen);
4610    ThenRCG(CGF);
4611  }
4612}
4613
4614void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4615                                       const OMPLoopDirective &D,
4616                                       llvm::Function *TaskFunction,
4617                                       QualType SharedsTy, Address Shareds,
4618                                       const Expr *IfCond,
4619                                       const OMPTaskDataTy &Data) {
4620  if (!CGF.HaveInsertPoint())
4621    return;
4622  TaskResultTy Result =
4623      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4624  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4625  // libcall.
4626  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4627  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4628  // sched, kmp_uint64 grainsize, void *task_dup);
4629  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4630  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4631  llvm::Value *IfVal;
4632  if (IfCond) {
4633    IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4634                                      /*isSigned=*/true);
4635  } else {
4636    IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4637  }
4638
4639  LValue LBLVal = CGF.EmitLValueForField(
4640      Result.TDBase,
4641      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4642  const auto *LBVar =
4643      cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4644  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
4645                       LBLVal.getQuals(),
4646                       /*IsInitializer=*/true);
4647  LValue UBLVal = CGF.EmitLValueForField(
4648      Result.TDBase,
4649      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4650  const auto *UBVar =
4651      cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4652  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
4653                       UBLVal.getQuals(),
4654                       /*IsInitializer=*/true);
4655  LValue StLVal = CGF.EmitLValueForField(
4656      Result.TDBase,
4657      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4658  const auto *StVar =
4659      cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4660  CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
4661                       StLVal.getQuals(),
4662                       /*IsInitializer=*/true);
4663  // Store reductions address.
4664  LValue RedLVal = CGF.EmitLValueForField(
4665      Result.TDBase,
4666      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4667  if (Data.Reductions) {
4668    CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4669  } else {
4670    CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
4671                               CGF.getContext().VoidPtrTy);
4672  }
4673  enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4674  llvm::Value *TaskArgs[] = {
4675      UpLoc,
4676      ThreadID,
4677      Result.NewTask,
4678      IfVal,
4679      LBLVal.getPointer(CGF),
4680      UBLVal.getPointer(CGF),
4681      CGF.EmitLoadOfScalar(StLVal, Loc),
4682      llvm::ConstantInt::getSigned(
4683          CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4684      llvm::ConstantInt::getSigned(
4685          CGF.IntTy, Data.Schedule.getPointer()
4686                         ? Data.Schedule.getInt() ? NumTasks : Grainsize
4687                         : NoSchedule),
4688      Data.Schedule.getPointer()
4689          ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4690                                      /*isSigned=*/false)
4691          : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4692      Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4693                             Result.TaskDupFn, CGF.VoidPtrTy)
4694                       : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4695  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4696                          CGM.getModule(), OMPRTL___kmpc_taskloop),
4697                      TaskArgs);
4698}
4699
4700/// Emit reduction operation for each element of array (required for
4701/// array sections) LHS op = RHS.
4702/// \param Type Type of array.
4703/// \param LHSVar Variable on the left side of the reduction operation
4704/// (references element of array in original variable).
4705/// \param RHSVar Variable on the right side of the reduction operation
4706/// (references element of array in original variable).
4707/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4708/// RHSVar.
4709static void EmitOMPAggregateReduction(
4710    CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4711    const VarDecl *RHSVar,
4712    const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4713                                  const Expr *, const Expr *)> &RedOpGen,
4714    const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4715    const Expr *UpExpr = nullptr) {
4716  // Perform element-by-element initialization.
4717  QualType ElementTy;
4718  Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4719  Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4720
4721  // Drill down to the base element type on both arrays.
4722  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4723  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4724
4725  llvm::Value *RHSBegin = RHSAddr.getPointer();
4726  llvm::Value *LHSBegin = LHSAddr.getPointer();
4727  // Cast from pointer to array type to pointer to single element.
4728  llvm::Value *LHSEnd =
4729      CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4730  // The basic structure here is a while-do loop.
4731  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4732  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4733  llvm::Value *IsEmpty =
4734      CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4735  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4736
4737  // Enter the loop body, making that address the current address.
4738  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4739  CGF.EmitBlock(BodyBB);
4740
4741  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4742
4743  llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4744      RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4745  RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4746  Address RHSElementCurrent(
4747      RHSElementPHI, RHSAddr.getElementType(),
4748      RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4749
4750  llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4751      LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4752  LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4753  Address LHSElementCurrent(
4754      LHSElementPHI, LHSAddr.getElementType(),
4755      LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4756
4757  // Emit copy.
4758  CodeGenFunction::OMPPrivateScope Scope(CGF);
4759  Scope.addPrivate(LHSVar, LHSElementCurrent);
4760  Scope.addPrivate(RHSVar, RHSElementCurrent);
4761  Scope.Privatize();
4762  RedOpGen(CGF, XExpr, EExpr, UpExpr);
4763  Scope.ForceCleanup();
4764
4765  // Shift the address forward by one element.
4766  llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4767      LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4768      "omp.arraycpy.dest.element");
4769  llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4770      RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4771      "omp.arraycpy.src.element");
4772  // Check whether we've reached the end.
4773  llvm::Value *Done =
4774      CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4775  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4776  LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4777  RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4778
4779  // Done.
4780  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4781}
4782
4783/// Emit reduction combiner. If the combiner is a simple expression emit it as
4784/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4785/// UDR combiner function.
4786static void emitReductionCombiner(CodeGenFunction &CGF,
4787                                  const Expr *ReductionOp) {
4788  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4789    if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4790      if (const auto *DRE =
4791              dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4792        if (const auto *DRD =
4793                dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4794          std::pair<llvm::Function *, llvm::Function *> Reduction =
4795              CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4796          RValue Func = RValue::get(Reduction.first);
4797          CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4798          CGF.EmitIgnoredExpr(ReductionOp);
4799          return;
4800        }
4801  CGF.EmitIgnoredExpr(ReductionOp);
4802}
4803
4804llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4805    StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4806    ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4807    ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4808  ASTContext &C = CGM.getContext();
4809
4810  // void reduction_func(void *LHSArg, void *RHSArg);
4811  FunctionArgList Args;
4812  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4813                           ImplicitParamKind::Other);
4814  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4815                           ImplicitParamKind::Other);
4816  Args.push_back(&LHSArg);
4817  Args.push_back(&RHSArg);
4818  const auto &CGFI =
4819      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4820  std::string Name = getReductionFuncName(ReducerName);
4821  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4822                                    llvm::GlobalValue::InternalLinkage, Name,
4823                                    &CGM.getModule());
4824  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4825  Fn->setDoesNotRecurse();
4826  CodeGenFunction CGF(CGM);
4827  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4828
4829  // Dst = (void*[n])(LHSArg);
4830  // Src = (void*[n])(RHSArg);
4831  Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4832                  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4833                  ArgsElemType->getPointerTo()),
4834              ArgsElemType, CGF.getPointerAlign());
4835  Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4836                  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4837                  ArgsElemType->getPointerTo()),
4838              ArgsElemType, CGF.getPointerAlign());
4839
4840  //  ...
4841  //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4842  //  ...
4843  CodeGenFunction::OMPPrivateScope Scope(CGF);
4844  const auto *IPriv = Privates.begin();
4845  unsigned Idx = 0;
4846  for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4847    const auto *RHSVar =
4848        cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4849    Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4850    const auto *LHSVar =
4851        cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4852    Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4853    QualType PrivTy = (*IPriv)->getType();
4854    if (PrivTy->isVariablyModifiedType()) {
4855      // Get array size and emit VLA type.
4856      ++Idx;
4857      Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4858      llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4859      const VariableArrayType *VLA =
4860          CGF.getContext().getAsVariableArrayType(PrivTy);
4861      const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4862      CodeGenFunction::OpaqueValueMapping OpaqueMap(
4863          CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4864      CGF.EmitVariablyModifiedType(PrivTy);
4865    }
4866  }
4867  Scope.Privatize();
4868  IPriv = Privates.begin();
4869  const auto *ILHS = LHSExprs.begin();
4870  const auto *IRHS = RHSExprs.begin();
4871  for (const Expr *E : ReductionOps) {
4872    if ((*IPriv)->getType()->isArrayType()) {
4873      // Emit reduction for array section.
4874      const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4875      const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4876      EmitOMPAggregateReduction(
4877          CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4878          [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4879            emitReductionCombiner(CGF, E);
4880          });
4881    } else {
4882      // Emit reduction for array subscript or single variable.
4883      emitReductionCombiner(CGF, E);
4884    }
4885    ++IPriv;
4886    ++ILHS;
4887    ++IRHS;
4888  }
4889  Scope.ForceCleanup();
4890  CGF.FinishFunction();
4891  return Fn;
4892}
4893
4894void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4895                                                  const Expr *ReductionOp,
4896                                                  const Expr *PrivateRef,
4897                                                  const DeclRefExpr *LHS,
4898                                                  const DeclRefExpr *RHS) {
4899  if (PrivateRef->getType()->isArrayType()) {
4900    // Emit reduction for array section.
4901    const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4902    const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4903    EmitOMPAggregateReduction(
4904        CGF, PrivateRef->getType(), LHSVar, RHSVar,
4905        [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4906          emitReductionCombiner(CGF, ReductionOp);
4907        });
4908  } else {
4909    // Emit reduction for array subscript or single variable.
4910    emitReductionCombiner(CGF, ReductionOp);
4911  }
4912}
4913
4914void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4915                                    ArrayRef<const Expr *> Privates,
4916                                    ArrayRef<const Expr *> LHSExprs,
4917                                    ArrayRef<const Expr *> RHSExprs,
4918                                    ArrayRef<const Expr *> ReductionOps,
4919                                    ReductionOptionsTy Options) {
4920  if (!CGF.HaveInsertPoint())
4921    return;
4922
4923  bool WithNowait = Options.WithNowait;
4924  bool SimpleReduction = Options.SimpleReduction;
4925
4926  // Next code should be emitted for reduction:
4927  //
4928  // static kmp_critical_name lock = { 0 };
4929  //
4930  // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4931  //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4932  //  ...
4933  //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4934  //  *(Type<n>-1*)rhs[<n>-1]);
4935  // }
4936  //
4937  // ...
4938  // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4939  // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4940  // RedList, reduce_func, &<lock>)) {
4941  // case 1:
4942  //  ...
4943  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4944  //  ...
4945  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4946  // break;
4947  // case 2:
4948  //  ...
4949  //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4950  //  ...
4951  // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4952  // break;
4953  // default:;
4954  // }
4955  //
4956  // if SimpleReduction is true, only the next code is generated:
4957  //  ...
4958  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4959  //  ...
4960
4961  ASTContext &C = CGM.getContext();
4962
4963  if (SimpleReduction) {
4964    CodeGenFunction::RunCleanupsScope Scope(CGF);
4965    const auto *IPriv = Privates.begin();
4966    const auto *ILHS = LHSExprs.begin();
4967    const auto *IRHS = RHSExprs.begin();
4968    for (const Expr *E : ReductionOps) {
4969      emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4970                                  cast<DeclRefExpr>(*IRHS));
4971      ++IPriv;
4972      ++ILHS;
4973      ++IRHS;
4974    }
4975    return;
4976  }
4977
4978  // 1. Build a list of reduction variables.
4979  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4980  auto Size = RHSExprs.size();
4981  for (const Expr *E : Privates) {
4982    if (E->getType()->isVariablyModifiedType())
4983      // Reserve place for array size.
4984      ++Size;
4985  }
4986  llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4987  QualType ReductionArrayTy = C.getConstantArrayType(
4988      C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
4989      /*IndexTypeQuals=*/0);
4990  Address ReductionList =
4991      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4992  const auto *IPriv = Privates.begin();
4993  unsigned Idx = 0;
4994  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4995    Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
4996    CGF.Builder.CreateStore(
4997        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4998            CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
4999        Elem);
5000    if ((*IPriv)->getType()->isVariablyModifiedType()) {
5001      // Store array size.
5002      ++Idx;
5003      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5004      llvm::Value *Size = CGF.Builder.CreateIntCast(
5005          CGF.getVLASize(
5006                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5007              .NumElts,
5008          CGF.SizeTy, /*isSigned=*/false);
5009      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5010                              Elem);
5011    }
5012  }
5013
5014  // 2. Emit reduce_func().
5015  llvm::Function *ReductionFn = emitReductionFunction(
5016      CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5017      Privates, LHSExprs, RHSExprs, ReductionOps);
5018
5019  // 3. Create static kmp_critical_name lock = { 0 };
5020  std::string Name = getName({"reduction"});
5021  llvm::Value *Lock = getCriticalRegionLock(Name);
5022
5023  // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5024  // RedList, reduce_func, &<lock>);
5025  llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5026  llvm::Value *ThreadId = getThreadID(CGF, Loc);
5027  llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5028  llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5029      ReductionList.getPointer(), CGF.VoidPtrTy);
5030  llvm::Value *Args[] = {
5031      IdentTLoc,                             // ident_t *<loc>
5032      ThreadId,                              // i32 <gtid>
5033      CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5034      ReductionArrayTySize,                  // size_type sizeof(RedList)
5035      RL,                                    // void *RedList
5036      ReductionFn, // void (*) (void *, void *) <reduce_func>
5037      Lock         // kmp_critical_name *&<lock>
5038  };
5039  llvm::Value *Res = CGF.EmitRuntimeCall(
5040      OMPBuilder.getOrCreateRuntimeFunction(
5041          CGM.getModule(),
5042          WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5043      Args);
5044
5045  // 5. Build switch(res)
5046  llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5047  llvm::SwitchInst *SwInst =
5048      CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5049
5050  // 6. Build case 1:
5051  //  ...
5052  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5053  //  ...
5054  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5055  // break;
5056  llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5057  SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5058  CGF.EmitBlock(Case1BB);
5059
5060  // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5061  llvm::Value *EndArgs[] = {
5062      IdentTLoc, // ident_t *<loc>
5063      ThreadId,  // i32 <gtid>
5064      Lock       // kmp_critical_name *&<lock>
5065  };
5066  auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5067                       CodeGenFunction &CGF, PrePostActionTy &Action) {
5068    CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5069    const auto *IPriv = Privates.begin();
5070    const auto *ILHS = LHSExprs.begin();
5071    const auto *IRHS = RHSExprs.begin();
5072    for (const Expr *E : ReductionOps) {
5073      RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5074                                     cast<DeclRefExpr>(*IRHS));
5075      ++IPriv;
5076      ++ILHS;
5077      ++IRHS;
5078    }
5079  };
5080  RegionCodeGenTy RCG(CodeGen);
5081  CommonActionTy Action(
5082      nullptr, std::nullopt,
5083      OMPBuilder.getOrCreateRuntimeFunction(
5084          CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5085                                      : OMPRTL___kmpc_end_reduce),
5086      EndArgs);
5087  RCG.setAction(Action);
5088  RCG(CGF);
5089
5090  CGF.EmitBranch(DefaultBB);
5091
5092  // 7. Build case 2:
5093  //  ...
5094  //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5095  //  ...
5096  // break;
5097  llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5098  SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5099  CGF.EmitBlock(Case2BB);
5100
5101  auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5102                             CodeGenFunction &CGF, PrePostActionTy &Action) {
5103    const auto *ILHS = LHSExprs.begin();
5104    const auto *IRHS = RHSExprs.begin();
5105    const auto *IPriv = Privates.begin();
5106    for (const Expr *E : ReductionOps) {
5107      const Expr *XExpr = nullptr;
5108      const Expr *EExpr = nullptr;
5109      const Expr *UpExpr = nullptr;
5110      BinaryOperatorKind BO = BO_Comma;
5111      if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5112        if (BO->getOpcode() == BO_Assign) {
5113          XExpr = BO->getLHS();
5114          UpExpr = BO->getRHS();
5115        }
5116      }
5117      // Try to emit update expression as a simple atomic.
5118      const Expr *RHSExpr = UpExpr;
5119      if (RHSExpr) {
5120        // Analyze RHS part of the whole expression.
5121        if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5122                RHSExpr->IgnoreParenImpCasts())) {
5123          // If this is a conditional operator, analyze its condition for
5124          // min/max reduction operator.
5125          RHSExpr = ACO->getCond();
5126        }
5127        if (const auto *BORHS =
5128                dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5129          EExpr = BORHS->getRHS();
5130          BO = BORHS->getOpcode();
5131        }
5132      }
5133      if (XExpr) {
5134        const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5135        auto &&AtomicRedGen = [BO, VD,
5136                               Loc](CodeGenFunction &CGF, const Expr *XExpr,
5137                                    const Expr *EExpr, const Expr *UpExpr) {
5138          LValue X = CGF.EmitLValue(XExpr);
5139          RValue E;
5140          if (EExpr)
5141            E = CGF.EmitAnyExpr(EExpr);
5142          CGF.EmitOMPAtomicSimpleUpdateExpr(
5143              X, E, BO, /*IsXLHSInRHSPart=*/true,
5144              llvm::AtomicOrdering::Monotonic, Loc,
5145              [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5146                CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5147                Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5148                CGF.emitOMPSimpleStore(
5149                    CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5150                    VD->getType().getNonReferenceType(), Loc);
5151                PrivateScope.addPrivate(VD, LHSTemp);
5152                (void)PrivateScope.Privatize();
5153                return CGF.EmitAnyExpr(UpExpr);
5154              });
5155        };
5156        if ((*IPriv)->getType()->isArrayType()) {
5157          // Emit atomic reduction for array section.
5158          const auto *RHSVar =
5159              cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5160          EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5161                                    AtomicRedGen, XExpr, EExpr, UpExpr);
5162        } else {
5163          // Emit atomic reduction for array subscript or single variable.
5164          AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5165        }
5166      } else {
5167        // Emit as a critical region.
5168        auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5169                                           const Expr *, const Expr *) {
5170          CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5171          std::string Name = RT.getName({"atomic_reduction"});
5172          RT.emitCriticalRegion(
5173              CGF, Name,
5174              [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5175                Action.Enter(CGF);
5176                emitReductionCombiner(CGF, E);
5177              },
5178              Loc);
5179        };
5180        if ((*IPriv)->getType()->isArrayType()) {
5181          const auto *LHSVar =
5182              cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5183          const auto *RHSVar =
5184              cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5185          EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5186                                    CritRedGen);
5187        } else {
5188          CritRedGen(CGF, nullptr, nullptr, nullptr);
5189        }
5190      }
5191      ++ILHS;
5192      ++IRHS;
5193      ++IPriv;
5194    }
5195  };
5196  RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5197  if (!WithNowait) {
5198    // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5199    llvm::Value *EndArgs[] = {
5200        IdentTLoc, // ident_t *<loc>
5201        ThreadId,  // i32 <gtid>
5202        Lock       // kmp_critical_name *&<lock>
5203    };
5204    CommonActionTy Action(nullptr, std::nullopt,
5205                          OMPBuilder.getOrCreateRuntimeFunction(
5206                              CGM.getModule(), OMPRTL___kmpc_end_reduce),
5207                          EndArgs);
5208    AtomicRCG.setAction(Action);
5209    AtomicRCG(CGF);
5210  } else {
5211    AtomicRCG(CGF);
5212  }
5213
5214  CGF.EmitBranch(DefaultBB);
5215  CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5216}
5217
5218/// Generates unique name for artificial threadprivate variables.
5219/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5220static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5221                                      const Expr *Ref) {
5222  SmallString<256> Buffer;
5223  llvm::raw_svector_ostream Out(Buffer);
5224  const clang::DeclRefExpr *DE;
5225  const VarDecl *D = ::getBaseDecl(Ref, DE);
5226  if (!D)
5227    D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5228  D = D->getCanonicalDecl();
5229  std::string Name = CGM.getOpenMPRuntime().getName(
5230      {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5231  Out << Prefix << Name << "_"
5232      << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5233  return std::string(Out.str());
5234}
5235
5236/// Emits reduction initializer function:
5237/// \code
5238/// void @.red_init(void* %arg, void* %orig) {
5239/// %0 = bitcast void* %arg to <type>*
5240/// store <type> <init>, <type>* %0
5241/// ret void
5242/// }
5243/// \endcode
5244static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5245                                           SourceLocation Loc,
5246                                           ReductionCodeGen &RCG, unsigned N) {
5247  ASTContext &C = CGM.getContext();
5248  QualType VoidPtrTy = C.VoidPtrTy;
5249  VoidPtrTy.addRestrict();
5250  FunctionArgList Args;
5251  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5252                          ImplicitParamKind::Other);
5253  ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5254                              ImplicitParamKind::Other);
5255  Args.emplace_back(&Param);
5256  Args.emplace_back(&ParamOrig);
5257  const auto &FnInfo =
5258      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5259  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5260  std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5261  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5262                                    Name, &CGM.getModule());
5263  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5264  Fn->setDoesNotRecurse();
5265  CodeGenFunction CGF(CGM);
5266  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5267  QualType PrivateType = RCG.getPrivateType(N);
5268  Address PrivateAddr = CGF.EmitLoadOfPointer(
5269      CGF.GetAddrOfLocalVar(&Param).withElementType(
5270          CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5271      C.getPointerType(PrivateType)->castAs<PointerType>());
5272  llvm::Value *Size = nullptr;
5273  // If the size of the reduction item is non-constant, load it from global
5274  // threadprivate variable.
5275  if (RCG.getSizes(N).second) {
5276    Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5277        CGF, CGM.getContext().getSizeType(),
5278        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5279    Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5280                                CGM.getContext().getSizeType(), Loc);
5281  }
5282  RCG.emitAggregateType(CGF, N, Size);
5283  Address OrigAddr = Address::invalid();
5284  // If initializer uses initializer from declare reduction construct, emit a
5285  // pointer to the address of the original reduction item (reuired by reduction
5286  // initializer)
5287  if (RCG.usesReductionInitializer(N)) {
5288    Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5289    OrigAddr = CGF.EmitLoadOfPointer(
5290        SharedAddr,
5291        CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5292  }
5293  // Emit the initializer:
5294  // %0 = bitcast void* %arg to <type>*
5295  // store <type> <init>, <type>* %0
5296  RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5297                         [](CodeGenFunction &) { return false; });
5298  CGF.FinishFunction();
5299  return Fn;
5300}
5301
5302/// Emits reduction combiner function:
5303/// \code
5304/// void @.red_comb(void* %arg0, void* %arg1) {
5305/// %lhs = bitcast void* %arg0 to <type>*
5306/// %rhs = bitcast void* %arg1 to <type>*
5307/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5308/// store <type> %2, <type>* %lhs
5309/// ret void
5310/// }
5311/// \endcode
5312static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5313                                           SourceLocation Loc,
5314                                           ReductionCodeGen &RCG, unsigned N,
5315                                           const Expr *ReductionOp,
5316                                           const Expr *LHS, const Expr *RHS,
5317                                           const Expr *PrivateRef) {
5318  ASTContext &C = CGM.getContext();
5319  const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5320  const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5321  FunctionArgList Args;
5322  ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5323                               C.VoidPtrTy, ImplicitParamKind::Other);
5324  ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5325                            ImplicitParamKind::Other);
5326  Args.emplace_back(&ParamInOut);
5327  Args.emplace_back(&ParamIn);
5328  const auto &FnInfo =
5329      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5330  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5331  std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5332  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5333                                    Name, &CGM.getModule());
5334  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5335  Fn->setDoesNotRecurse();
5336  CodeGenFunction CGF(CGM);
5337  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5338  llvm::Value *Size = nullptr;
5339  // If the size of the reduction item is non-constant, load it from global
5340  // threadprivate variable.
5341  if (RCG.getSizes(N).second) {
5342    Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5343        CGF, CGM.getContext().getSizeType(),
5344        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5345    Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5346                                CGM.getContext().getSizeType(), Loc);
5347  }
5348  RCG.emitAggregateType(CGF, N, Size);
5349  // Remap lhs and rhs variables to the addresses of the function arguments.
5350  // %lhs = bitcast void* %arg0 to <type>*
5351  // %rhs = bitcast void* %arg1 to <type>*
5352  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5353  PrivateScope.addPrivate(
5354      LHSVD,
5355      // Pull out the pointer to the variable.
5356      CGF.EmitLoadOfPointer(
5357          CGF.GetAddrOfLocalVar(&ParamInOut)
5358              .withElementType(
5359                  CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5360          C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5361  PrivateScope.addPrivate(
5362      RHSVD,
5363      // Pull out the pointer to the variable.
5364      CGF.EmitLoadOfPointer(
5365          CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5366              CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5367          C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5368  PrivateScope.Privatize();
5369  // Emit the combiner body:
5370  // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5371  // store <type> %2, <type>* %lhs
5372  CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5373      CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5374      cast<DeclRefExpr>(RHS));
5375  CGF.FinishFunction();
5376  return Fn;
5377}
5378
5379/// Emits reduction finalizer function:
5380/// \code
5381/// void @.red_fini(void* %arg) {
5382/// %0 = bitcast void* %arg to <type>*
5383/// <destroy>(<type>* %0)
5384/// ret void
5385/// }
5386/// \endcode
5387static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5388                                           SourceLocation Loc,
5389                                           ReductionCodeGen &RCG, unsigned N) {
5390  if (!RCG.needCleanups(N))
5391    return nullptr;
5392  ASTContext &C = CGM.getContext();
5393  FunctionArgList Args;
5394  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5395                          ImplicitParamKind::Other);
5396  Args.emplace_back(&Param);
5397  const auto &FnInfo =
5398      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5399  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5400  std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5401  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5402                                    Name, &CGM.getModule());
5403  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5404  Fn->setDoesNotRecurse();
5405  CodeGenFunction CGF(CGM);
5406  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5407  Address PrivateAddr = CGF.EmitLoadOfPointer(
5408      CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5409  llvm::Value *Size = nullptr;
5410  // If the size of the reduction item is non-constant, load it from global
5411  // threadprivate variable.
5412  if (RCG.getSizes(N).second) {
5413    Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5414        CGF, CGM.getContext().getSizeType(),
5415        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5416    Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5417                                CGM.getContext().getSizeType(), Loc);
5418  }
5419  RCG.emitAggregateType(CGF, N, Size);
5420  // Emit the finalizer body:
5421  // <destroy>(<type>* %0)
5422  RCG.emitCleanups(CGF, N, PrivateAddr);
5423  CGF.FinishFunction(Loc);
5424  return Fn;
5425}
5426
5427llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5428    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5429    ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5430  if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5431    return nullptr;
5432
5433  // Build typedef struct:
5434  // kmp_taskred_input {
5435  //   void *reduce_shar; // shared reduction item
5436  //   void *reduce_orig; // original reduction item used for initialization
5437  //   size_t reduce_size; // size of data item
5438  //   void *reduce_init; // data initialization routine
5439  //   void *reduce_fini; // data finalization routine
5440  //   void *reduce_comb; // data combiner routine
5441  //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5442  // } kmp_taskred_input_t;
5443  ASTContext &C = CGM.getContext();
5444  RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5445  RD->startDefinition();
5446  const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5447  const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5448  const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5449  const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5450  const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5451  const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5452  const FieldDecl *FlagsFD = addFieldToRecordDecl(
5453      C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5454  RD->completeDefinition();
5455  QualType RDType = C.getRecordType(RD);
5456  unsigned Size = Data.ReductionVars.size();
5457  llvm::APInt ArraySize(/*numBits=*/64, Size);
5458  QualType ArrayRDType =
5459      C.getConstantArrayType(RDType, ArraySize, nullptr,
5460                             ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5461  // kmp_task_red_input_t .rd_input.[Size];
5462  Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5463  ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5464                       Data.ReductionCopies, Data.ReductionOps);
5465  for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5466    // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5467    llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5468                           llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5469    llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5470        TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5471        /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5472        ".rd_input.gep.");
5473    LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5474    // ElemLVal.reduce_shar = &Shareds[Cnt];
5475    LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5476    RCG.emitSharedOrigLValue(CGF, Cnt);
5477    llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5478    CGF.EmitStoreOfScalar(Shared, SharedLVal);
5479    // ElemLVal.reduce_orig = &Origs[Cnt];
5480    LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5481    llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5482    CGF.EmitStoreOfScalar(Orig, OrigLVal);
5483    RCG.emitAggregateType(CGF, Cnt);
5484    llvm::Value *SizeValInChars;
5485    llvm::Value *SizeVal;
5486    std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5487    // We use delayed creation/initialization for VLAs and array sections. It is
5488    // required because runtime does not provide the way to pass the sizes of
5489    // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5490    // threadprivate global variables are used to store these values and use
5491    // them in the functions.
5492    bool DelayedCreation = !!SizeVal;
5493    SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5494                                               /*isSigned=*/false);
5495    LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5496    CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5497    // ElemLVal.reduce_init = init;
5498    LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5499    llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5500    CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5501    // ElemLVal.reduce_fini = fini;
5502    LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5503    llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5504    llvm::Value *FiniAddr =
5505        Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5506    CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5507    // ElemLVal.reduce_comb = comb;
5508    LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5509    llvm::Value *CombAddr = emitReduceCombFunction(
5510        CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5511        RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5512    CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5513    // ElemLVal.flags = 0;
5514    LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5515    if (DelayedCreation) {
5516      CGF.EmitStoreOfScalar(
5517          llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5518          FlagsLVal);
5519    } else
5520      CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
5521                                 FlagsLVal.getType());
5522  }
5523  if (Data.IsReductionWithTaskMod) {
5524    // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5525    // is_ws, int num, void *data);
5526    llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5527    llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5528                                                  CGM.IntTy, /*isSigned=*/true);
5529    llvm::Value *Args[] = {
5530        IdentTLoc, GTid,
5531        llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5532                               /*isSigned=*/true),
5533        llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5534        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5535            TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5536    return CGF.EmitRuntimeCall(
5537        OMPBuilder.getOrCreateRuntimeFunction(
5538            CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5539        Args);
5540  }
5541  // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5542  llvm::Value *Args[] = {
5543      CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5544                                /*isSigned=*/true),
5545      llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5546      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5547                                                      CGM.VoidPtrTy)};
5548  return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5549                                 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5550                             Args);
5551}
5552
5553void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5554                                            SourceLocation Loc,
5555                                            bool IsWorksharingReduction) {
5556  // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5557  // is_ws, int num, void *data);
5558  llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5559  llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5560                                                CGM.IntTy, /*isSigned=*/true);
5561  llvm::Value *Args[] = {IdentTLoc, GTid,
5562                         llvm::ConstantInt::get(CGM.IntTy,
5563                                                IsWorksharingReduction ? 1 : 0,
5564                                                /*isSigned=*/true)};
5565  (void)CGF.EmitRuntimeCall(
5566      OMPBuilder.getOrCreateRuntimeFunction(
5567          CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5568      Args);
5569}
5570
5571void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5572                                              SourceLocation Loc,
5573                                              ReductionCodeGen &RCG,
5574                                              unsigned N) {
5575  auto Sizes = RCG.getSizes(N);
5576  // Emit threadprivate global variable if the type is non-constant
5577  // (Sizes.second = nullptr).
5578  if (Sizes.second) {
5579    llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5580                                                     /*isSigned=*/false);
5581    Address SizeAddr = getAddrOfArtificialThreadPrivate(
5582        CGF, CGM.getContext().getSizeType(),
5583        generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5584    CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5585  }
5586}
5587
5588Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5589                                              SourceLocation Loc,
5590                                              llvm::Value *ReductionsPtr,
5591                                              LValue SharedLVal) {
5592  // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5593  // *d);
5594  llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5595                                                   CGM.IntTy,
5596                                                   /*isSigned=*/true),
5597                         ReductionsPtr,
5598                         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5599                             SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5600  return Address(
5601      CGF.EmitRuntimeCall(
5602          OMPBuilder.getOrCreateRuntimeFunction(
5603              CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5604          Args),
5605      CGF.Int8Ty, SharedLVal.getAlignment());
5606}
5607
5608void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5609                                       const OMPTaskDataTy &Data) {
5610  if (!CGF.HaveInsertPoint())
5611    return;
5612
5613  if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5614    // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5615    OMPBuilder.createTaskwait(CGF.Builder);
5616  } else {
5617    llvm::Value *ThreadID = getThreadID(CGF, Loc);
5618    llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5619    auto &M = CGM.getModule();
5620    Address DependenciesArray = Address::invalid();
5621    llvm::Value *NumOfElements;
5622    std::tie(NumOfElements, DependenciesArray) =
5623        emitDependClause(CGF, Data.Dependences, Loc);
5624    if (!Data.Dependences.empty()) {
5625      llvm::Value *DepWaitTaskArgs[7];
5626      DepWaitTaskArgs[0] = UpLoc;
5627      DepWaitTaskArgs[1] = ThreadID;
5628      DepWaitTaskArgs[2] = NumOfElements;
5629      DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5630      DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5631      DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5632      DepWaitTaskArgs[6] =
5633          llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5634
5635      CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5636
5637      // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5638      // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5639      // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5640      // kmp_int32 has_no_wait); if dependence info is specified.
5641      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5642                              M, OMPRTL___kmpc_omp_taskwait_deps_51),
5643                          DepWaitTaskArgs);
5644
5645    } else {
5646
5647      // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5648      // global_tid);
5649      llvm::Value *Args[] = {UpLoc, ThreadID};
5650      // Ignore return result until untied tasks are supported.
5651      CGF.EmitRuntimeCall(
5652          OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5653          Args);
5654    }
5655  }
5656
5657  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5658    Region->emitUntiedSwitch(CGF);
5659}
5660
5661void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5662                                           OpenMPDirectiveKind InnerKind,
5663                                           const RegionCodeGenTy &CodeGen,
5664                                           bool HasCancel) {
5665  if (!CGF.HaveInsertPoint())
5666    return;
5667  InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5668                                 InnerKind != OMPD_critical &&
5669                                     InnerKind != OMPD_master &&
5670                                     InnerKind != OMPD_masked);
5671  CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5672}
5673
5674namespace {
5675enum RTCancelKind {
5676  CancelNoreq = 0,
5677  CancelParallel = 1,
5678  CancelLoop = 2,
5679  CancelSections = 3,
5680  CancelTaskgroup = 4
5681};
5682} // anonymous namespace
5683
5684static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5685  RTCancelKind CancelKind = CancelNoreq;
5686  if (CancelRegion == OMPD_parallel)
5687    CancelKind = CancelParallel;
5688  else if (CancelRegion == OMPD_for)
5689    CancelKind = CancelLoop;
5690  else if (CancelRegion == OMPD_sections)
5691    CancelKind = CancelSections;
5692  else {
5693    assert(CancelRegion == OMPD_taskgroup);
5694    CancelKind = CancelTaskgroup;
5695  }
5696  return CancelKind;
5697}
5698
5699void CGOpenMPRuntime::emitCancellationPointCall(
5700    CodeGenFunction &CGF, SourceLocation Loc,
5701    OpenMPDirectiveKind CancelRegion) {
5702  if (!CGF.HaveInsertPoint())
5703    return;
5704  // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5705  // global_tid, kmp_int32 cncl_kind);
5706  if (auto *OMPRegionInfo =
5707          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5708    // For 'cancellation point taskgroup', the task region info may not have a
5709    // cancel. This may instead happen in another adjacent task.
5710    if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5711      llvm::Value *Args[] = {
5712          emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5713          CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5714      // Ignore return result until untied tasks are supported.
5715      llvm::Value *Result = CGF.EmitRuntimeCall(
5716          OMPBuilder.getOrCreateRuntimeFunction(
5717              CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5718          Args);
5719      // if (__kmpc_cancellationpoint()) {
5720      //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5721      //   exit from construct;
5722      // }
5723      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5724      llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5725      llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5726      CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5727      CGF.EmitBlock(ExitBB);
5728      if (CancelRegion == OMPD_parallel)
5729        emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5730      // exit from construct;
5731      CodeGenFunction::JumpDest CancelDest =
5732          CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5733      CGF.EmitBranchThroughCleanup(CancelDest);
5734      CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5735    }
5736  }
5737}
5738
5739void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5740                                     const Expr *IfCond,
5741                                     OpenMPDirectiveKind CancelRegion) {
5742  if (!CGF.HaveInsertPoint())
5743    return;
5744  // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5745  // kmp_int32 cncl_kind);
5746  auto &M = CGM.getModule();
5747  if (auto *OMPRegionInfo =
5748          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5749    auto &&ThenGen = [this, &M, Loc, CancelRegion,
5750                      OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5751      CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5752      llvm::Value *Args[] = {
5753          RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5754          CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5755      // Ignore return result until untied tasks are supported.
5756      llvm::Value *Result = CGF.EmitRuntimeCall(
5757          OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5758      // if (__kmpc_cancel()) {
5759      //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5760      //   exit from construct;
5761      // }
5762      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5763      llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5764      llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5765      CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5766      CGF.EmitBlock(ExitBB);
5767      if (CancelRegion == OMPD_parallel)
5768        RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5769      // exit from construct;
5770      CodeGenFunction::JumpDest CancelDest =
5771          CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5772      CGF.EmitBranchThroughCleanup(CancelDest);
5773      CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5774    };
5775    if (IfCond) {
5776      emitIfClause(CGF, IfCond, ThenGen,
5777                   [](CodeGenFunction &, PrePostActionTy &) {});
5778    } else {
5779      RegionCodeGenTy ThenRCG(ThenGen);
5780      ThenRCG(CGF);
5781    }
5782  }
5783}
5784
5785namespace {
5786/// Cleanup action for uses_allocators support.
5787class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5788  ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5789
5790public:
5791  OMPUsesAllocatorsActionTy(
5792      ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5793      : Allocators(Allocators) {}
5794  void Enter(CodeGenFunction &CGF) override {
5795    if (!CGF.HaveInsertPoint())
5796      return;
5797    for (const auto &AllocatorData : Allocators) {
5798      CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
5799          CGF, AllocatorData.first, AllocatorData.second);
5800    }
5801  }
5802  void Exit(CodeGenFunction &CGF) override {
5803    if (!CGF.HaveInsertPoint())
5804      return;
5805    for (const auto &AllocatorData : Allocators) {
5806      CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
5807                                                        AllocatorData.first);
5808    }
5809  }
5810};
5811} // namespace
5812
5813void CGOpenMPRuntime::emitTargetOutlinedFunction(
5814    const OMPExecutableDirective &D, StringRef ParentName,
5815    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5816    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5817  assert(!ParentName.empty() && "Invalid target entry parent name!");
5818  HasEmittedTargetRegion = true;
5819  SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
5820  for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5821    for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5822      const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5823      if (!D.AllocatorTraits)
5824        continue;
5825      Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5826    }
5827  }
5828  OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5829  CodeGen.setAction(UsesAllocatorAction);
5830  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5831                                   IsOffloadEntry, CodeGen);
5832}
5833
5834void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
5835                                             const Expr *Allocator,
5836                                             const Expr *AllocatorTraits) {
5837  llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5838  ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5839  // Use default memspace handle.
5840  llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5841  llvm::Value *NumTraits = llvm::ConstantInt::get(
5842      CGF.IntTy, cast<ConstantArrayType>(
5843                     AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5844                     ->getSize()
5845                     .getLimitedValue());
5846  LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5847  Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5848      AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5849  AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5850                                           AllocatorTraitsLVal.getBaseInfo(),
5851                                           AllocatorTraitsLVal.getTBAAInfo());
5852  llvm::Value *Traits = Addr.getPointer();
5853
5854  llvm::Value *AllocatorVal =
5855      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5856                              CGM.getModule(), OMPRTL___kmpc_init_allocator),
5857                          {ThreadId, MemSpaceHandle, NumTraits, Traits});
5858  // Store to allocator.
5859  CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5860      cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5861  LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5862  AllocatorVal =
5863      CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5864                               Allocator->getType(), Allocator->getExprLoc());
5865  CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5866}
5867
5868void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
5869                                             const Expr *Allocator) {
5870  llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5871  ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5872  LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5873  llvm::Value *AllocatorVal =
5874      CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5875  AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5876                                          CGF.getContext().VoidPtrTy,
5877                                          Allocator->getExprLoc());
5878  (void)CGF.EmitRuntimeCall(
5879      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
5880                                            OMPRTL___kmpc_destroy_allocator),
5881      {ThreadId, AllocatorVal});
5882}
5883
5884void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
5885    const OMPExecutableDirective &D, CodeGenFunction &CGF,
5886    int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5887    int32_t &MaxTeamsVal) {
5888
5889  getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5890  getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
5891                                      /*UpperBoundOnly=*/true);
5892
5893  for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5894    for (auto *A : C->getAttrs()) {
5895      int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5896      int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5897      if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5898        CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5899                                       &AttrMinBlocksVal, &AttrMaxBlocksVal);
5900      else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5901        CGM.handleAMDGPUFlatWorkGroupSizeAttr(
5902            nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5903            &AttrMaxThreadsVal);
5904      else
5905        continue;
5906
5907      MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
5908      if (AttrMaxThreadsVal > 0)
5909        MaxThreadsVal = MaxThreadsVal > 0
5910                            ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
5911                            : AttrMaxThreadsVal;
5912      MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
5913      if (AttrMaxBlocksVal > 0)
5914        MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
5915                                      : AttrMaxBlocksVal;
5916    }
5917  }
5918}
5919
5920void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
5921    const OMPExecutableDirective &D, StringRef ParentName,
5922    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5923    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5924
5925  llvm::TargetRegionEntryInfo EntryInfo =
5926      getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
5927
5928  CodeGenFunction CGF(CGM, true);
5929  llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5930      [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5931        const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5932
5933        CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5934        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5935        return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
5936      };
5937
5938  OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
5939                                      IsOffloadEntry, OutlinedFn, OutlinedFnID);
5940
5941  if (!OutlinedFn)
5942    return;
5943
5944  CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
5945
5946  for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5947    for (auto *A : C->getAttrs()) {
5948      if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5949        CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5950    }
5951  }
5952}
5953
5954/// Checks if the expression is constant or does not have non-trivial function
5955/// calls.
5956static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5957  // We can skip constant expressions.
5958  // We can skip expressions with trivial calls or simple expressions.
5959  return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
5960          !E->hasNonTrivialCall(Ctx)) &&
5961         !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5962}
5963
5964const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
5965                                                    const Stmt *Body) {
5966  const Stmt *Child = Body->IgnoreContainers();
5967  while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
5968    Child = nullptr;
5969    for (const Stmt *S : C->body()) {
5970      if (const auto *E = dyn_cast<Expr>(S)) {
5971        if (isTrivial(Ctx, E))
5972          continue;
5973      }
5974      // Some of the statements can be ignored.
5975      if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
5976          isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
5977        continue;
5978      // Analyze declarations.
5979      if (const auto *DS = dyn_cast<DeclStmt>(S)) {
5980        if (llvm::all_of(DS->decls(), [](const Decl *D) {
5981              if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
5982                  isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
5983                  isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
5984                  isa<UsingDirectiveDecl>(D) ||
5985                  isa<OMPDeclareReductionDecl>(D) ||
5986                  isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
5987                return true;
5988              const auto *VD = dyn_cast<VarDecl>(D);
5989              if (!VD)
5990                return false;
5991              return VD->hasGlobalStorage() || !VD->isUsed();
5992            }))
5993          continue;
5994      }
5995      // Found multiple children - cannot get the one child only.
5996      if (Child)
5997        return nullptr;
5998      Child = S;
5999    }
6000    if (Child)
6001      Child = Child->IgnoreContainers();
6002  }
6003  return Child;
6004}
6005
6006const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6007    CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6008    int32_t &MaxTeamsVal) {
6009
6010  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6011  assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6012         "Expected target-based executable directive.");
6013  switch (DirectiveKind) {
6014  case OMPD_target: {
6015    const auto *CS = D.getInnermostCapturedStmt();
6016    const auto *Body =
6017        CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6018    const Stmt *ChildStmt =
6019        CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6020    if (const auto *NestedDir =
6021            dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6022      if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6023        if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6024          const Expr *NumTeams =
6025              NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6026          if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6027            if (auto Constant =
6028                    NumTeams->getIntegerConstantExpr(CGF.getContext()))
6029              MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6030          return NumTeams;
6031        }
6032        MinTeamsVal = MaxTeamsVal = 0;
6033        return nullptr;
6034      }
6035      if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6036          isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6037        MinTeamsVal = MaxTeamsVal = 1;
6038        return nullptr;
6039      }
6040      MinTeamsVal = MaxTeamsVal = 1;
6041      return nullptr;
6042    }
6043    // A value of -1 is used to check if we need to emit no teams region
6044    MinTeamsVal = MaxTeamsVal = -1;
6045    return nullptr;
6046  }
6047  case OMPD_target_teams_loop:
6048  case OMPD_target_teams:
6049  case OMPD_target_teams_distribute:
6050  case OMPD_target_teams_distribute_simd:
6051  case OMPD_target_teams_distribute_parallel_for:
6052  case OMPD_target_teams_distribute_parallel_for_simd: {
6053    if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6054      const Expr *NumTeams =
6055          D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6056      if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6057        if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6058          MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6059      return NumTeams;
6060    }
6061    MinTeamsVal = MaxTeamsVal = 0;
6062    return nullptr;
6063  }
6064  case OMPD_target_parallel:
6065  case OMPD_target_parallel_for:
6066  case OMPD_target_parallel_for_simd:
6067  case OMPD_target_parallel_loop:
6068  case OMPD_target_simd:
6069    MinTeamsVal = MaxTeamsVal = 1;
6070    return nullptr;
6071  case OMPD_parallel:
6072  case OMPD_for:
6073  case OMPD_parallel_for:
6074  case OMPD_parallel_loop:
6075  case OMPD_parallel_master:
6076  case OMPD_parallel_sections:
6077  case OMPD_for_simd:
6078  case OMPD_parallel_for_simd:
6079  case OMPD_cancel:
6080  case OMPD_cancellation_point:
6081  case OMPD_ordered:
6082  case OMPD_threadprivate:
6083  case OMPD_allocate:
6084  case OMPD_task:
6085  case OMPD_simd:
6086  case OMPD_tile:
6087  case OMPD_unroll:
6088  case OMPD_sections:
6089  case OMPD_section:
6090  case OMPD_single:
6091  case OMPD_master:
6092  case OMPD_critical:
6093  case OMPD_taskyield:
6094  case OMPD_barrier:
6095  case OMPD_taskwait:
6096  case OMPD_taskgroup:
6097  case OMPD_atomic:
6098  case OMPD_flush:
6099  case OMPD_depobj:
6100  case OMPD_scan:
6101  case OMPD_teams:
6102  case OMPD_target_data:
6103  case OMPD_target_exit_data:
6104  case OMPD_target_enter_data:
6105  case OMPD_distribute:
6106  case OMPD_distribute_simd:
6107  case OMPD_distribute_parallel_for:
6108  case OMPD_distribute_parallel_for_simd:
6109  case OMPD_teams_distribute:
6110  case OMPD_teams_distribute_simd:
6111  case OMPD_teams_distribute_parallel_for:
6112  case OMPD_teams_distribute_parallel_for_simd:
6113  case OMPD_target_update:
6114  case OMPD_declare_simd:
6115  case OMPD_declare_variant:
6116  case OMPD_begin_declare_variant:
6117  case OMPD_end_declare_variant:
6118  case OMPD_declare_target:
6119  case OMPD_end_declare_target:
6120  case OMPD_declare_reduction:
6121  case OMPD_declare_mapper:
6122  case OMPD_taskloop:
6123  case OMPD_taskloop_simd:
6124  case OMPD_master_taskloop:
6125  case OMPD_master_taskloop_simd:
6126  case OMPD_parallel_master_taskloop:
6127  case OMPD_parallel_master_taskloop_simd:
6128  case OMPD_requires:
6129  case OMPD_metadirective:
6130  case OMPD_unknown:
6131    break;
6132  default:
6133    break;
6134  }
6135  llvm_unreachable("Unexpected directive kind.");
6136}
6137
6138llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6139    CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6140  assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6141         "Clauses associated with the teams directive expected to be emitted "
6142         "only for the host!");
6143  CGBuilderTy &Bld = CGF.Builder;
6144  int32_t MinNT = -1, MaxNT = -1;
6145  const Expr *NumTeams =
6146      getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6147  if (NumTeams != nullptr) {
6148    OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6149
6150    switch (DirectiveKind) {
6151    case OMPD_target: {
6152      const auto *CS = D.getInnermostCapturedStmt();
6153      CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6154      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6155      llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6156                                                  /*IgnoreResultAssign*/ true);
6157      return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6158                             /*isSigned=*/true);
6159    }
6160    case OMPD_target_teams:
6161    case OMPD_target_teams_distribute:
6162    case OMPD_target_teams_distribute_simd:
6163    case OMPD_target_teams_distribute_parallel_for:
6164    case OMPD_target_teams_distribute_parallel_for_simd: {
6165      CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6166      llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6167                                                  /*IgnoreResultAssign*/ true);
6168      return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6169                             /*isSigned=*/true);
6170    }
6171    default:
6172      break;
6173    }
6174  }
6175
6176  assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6177  return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6178}
6179
6180/// Check for a num threads constant value (stored in \p DefaultVal), or
6181/// expression (stored in \p E). If the value is conditional (via an if-clause),
6182/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6183/// nullptr, no expression evaluation is perfomed.
6184static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6185                          const Expr **E, int32_t &UpperBound,
6186                          bool UpperBoundOnly, llvm::Value **CondVal) {
6187  const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6188      CGF.getContext(), CS->getCapturedStmt());
6189  const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6190  if (!Dir)
6191    return;
6192
6193  if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6194    // Handle if clause. If if clause present, the number of threads is
6195    // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6196    if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6197      CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6198      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6199      const OMPIfClause *IfClause = nullptr;
6200      for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6201        if (C->getNameModifier() == OMPD_unknown ||
6202            C->getNameModifier() == OMPD_parallel) {
6203          IfClause = C;
6204          break;
6205        }
6206      }
6207      if (IfClause) {
6208        const Expr *CondExpr = IfClause->getCondition();
6209        bool Result;
6210        if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6211          if (!Result) {
6212            UpperBound = 1;
6213            return;
6214          }
6215        } else {
6216          CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6217          if (const auto *PreInit =
6218                  cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6219            for (const auto *I : PreInit->decls()) {
6220              if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6221                CGF.EmitVarDecl(cast<VarDecl>(*I));
6222              } else {
6223                CodeGenFunction::AutoVarEmission Emission =
6224                    CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6225                CGF.EmitAutoVarCleanups(Emission);
6226              }
6227            }
6228            *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6229          }
6230        }
6231      }
6232    }
6233    // Check the value of num_threads clause iff if clause was not specified
6234    // or is not evaluated to false.
6235    if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6236      CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6237      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6238      const auto *NumThreadsClause =
6239          Dir->getSingleClause<OMPNumThreadsClause>();
6240      const Expr *NTExpr = NumThreadsClause->getNumThreads();
6241      if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6242        if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6243          UpperBound =
6244              UpperBound
6245                  ? Constant->getZExtValue()
6246                  : std::min(UpperBound,
6247                             static_cast<int32_t>(Constant->getZExtValue()));
6248      // If we haven't found a upper bound, remember we saw a thread limiting
6249      // clause.
6250      if (UpperBound == -1)
6251        UpperBound = 0;
6252      if (!E)
6253        return;
6254      CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6255      if (const auto *PreInit =
6256              cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6257        for (const auto *I : PreInit->decls()) {
6258          if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6259            CGF.EmitVarDecl(cast<VarDecl>(*I));
6260          } else {
6261            CodeGenFunction::AutoVarEmission Emission =
6262                CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6263            CGF.EmitAutoVarCleanups(Emission);
6264          }
6265        }
6266      }
6267      *E = NTExpr;
6268    }
6269    return;
6270  }
6271  if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6272    UpperBound = 1;
6273}
6274
6275const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6276    CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6277    bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6278  assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6279         "Clauses associated with the teams directive expected to be emitted "
6280         "only for the host!");
6281  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6282  assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6283         "Expected target-based executable directive.");
6284
6285  const Expr *NT = nullptr;
6286  const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6287
6288  auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6289    if (E->isIntegerConstantExpr(CGF.getContext())) {
6290      if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6291        UpperBound = UpperBound ? Constant->getZExtValue()
6292                                : std::min(UpperBound,
6293                                           int32_t(Constant->getZExtValue()));
6294    }
6295    // If we haven't found a upper bound, remember we saw a thread limiting
6296    // clause.
6297    if (UpperBound == -1)
6298      UpperBound = 0;
6299    if (EPtr)
6300      *EPtr = E;
6301  };
6302
6303  auto ReturnSequential = [&]() {
6304    UpperBound = 1;
6305    return NT;
6306  };
6307
6308  switch (DirectiveKind) {
6309  case OMPD_target: {
6310    const CapturedStmt *CS = D.getInnermostCapturedStmt();
6311    getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6312    const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6313        CGF.getContext(), CS->getCapturedStmt());
6314    // TODO: The standard is not clear how to resolve two thread limit clauses,
6315    //       let's pick the teams one if it's present, otherwise the target one.
6316    const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6317    if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6318      if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6319        ThreadLimitClause = TLC;
6320        if (ThreadLimitExpr) {
6321          CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6322          CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6323          CodeGenFunction::LexicalScope Scope(
6324              CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6325          if (const auto *PreInit =
6326                  cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6327            for (const auto *I : PreInit->decls()) {
6328              if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6329                CGF.EmitVarDecl(cast<VarDecl>(*I));
6330              } else {
6331                CodeGenFunction::AutoVarEmission Emission =
6332                    CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6333                CGF.EmitAutoVarCleanups(Emission);
6334              }
6335            }
6336          }
6337        }
6338      }
6339    }
6340    if (ThreadLimitClause)
6341      CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6342    if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6343      if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6344          !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6345        CS = Dir->getInnermostCapturedStmt();
6346        const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6347            CGF.getContext(), CS->getCapturedStmt());
6348        Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6349      }
6350      if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6351        CS = Dir->getInnermostCapturedStmt();
6352        getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6353      } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6354        return ReturnSequential();
6355    }
6356    return NT;
6357  }
6358  case OMPD_target_teams: {
6359    if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6360      CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6361      const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6362      CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6363    }
6364    const CapturedStmt *CS = D.getInnermostCapturedStmt();
6365    getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6366    const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6367        CGF.getContext(), CS->getCapturedStmt());
6368    if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6369      if (Dir->getDirectiveKind() == OMPD_distribute) {
6370        CS = Dir->getInnermostCapturedStmt();
6371        getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6372      }
6373    }
6374    return NT;
6375  }
6376  case OMPD_target_teams_distribute:
6377    if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6378      CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6379      const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6380      CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6381    }
6382    getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6383                  UpperBoundOnly, CondVal);
6384    return NT;
6385  case OMPD_target_teams_loop:
6386  case OMPD_target_parallel_loop:
6387  case OMPD_target_parallel:
6388  case OMPD_target_parallel_for:
6389  case OMPD_target_parallel_for_simd:
6390  case OMPD_target_teams_distribute_parallel_for:
6391  case OMPD_target_teams_distribute_parallel_for_simd: {
6392    if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6393      const OMPIfClause *IfClause = nullptr;
6394      for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6395        if (C->getNameModifier() == OMPD_unknown ||
6396            C->getNameModifier() == OMPD_parallel) {
6397          IfClause = C;
6398          break;
6399        }
6400      }
6401      if (IfClause) {
6402        const Expr *Cond = IfClause->getCondition();
6403        bool Result;
6404        if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6405          if (!Result)
6406            return ReturnSequential();
6407        } else {
6408          CodeGenFunction::RunCleanupsScope Scope(CGF);
6409          *CondVal = CGF.EvaluateExprAsBool(Cond);
6410        }
6411      }
6412    }
6413    if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6414      CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6415      const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6416      CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6417    }
6418    if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6419      CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6420      const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6421      CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6422      return NumThreadsClause->getNumThreads();
6423    }
6424    return NT;
6425  }
6426  case OMPD_target_teams_distribute_simd:
6427  case OMPD_target_simd:
6428    return ReturnSequential();
6429  default:
6430    break;
6431  }
6432  llvm_unreachable("Unsupported directive kind.");
6433}
6434
6435llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6436    CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6437  llvm::Value *NumThreadsVal = nullptr;
6438  llvm::Value *CondVal = nullptr;
6439  llvm::Value *ThreadLimitVal = nullptr;
6440  const Expr *ThreadLimitExpr = nullptr;
6441  int32_t UpperBound = -1;
6442
6443  const Expr *NT = getNumThreadsExprForTargetDirective(
6444      CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6445      &ThreadLimitExpr);
6446
6447  // Thread limit expressions are used below, emit them.
6448  if (ThreadLimitExpr) {
6449    ThreadLimitVal =
6450        CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6451    ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6452                                               /*isSigned=*/false);
6453  }
6454
6455  // Generate the num teams expression.
6456  if (UpperBound == 1) {
6457    NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6458  } else if (NT) {
6459    NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6460    NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6461                                              /*isSigned=*/false);
6462  } else if (ThreadLimitVal) {
6463    // If we do not have a num threads value but a thread limit, replace the
6464    // former with the latter. We know handled the thread limit expression.
6465    NumThreadsVal = ThreadLimitVal;
6466    ThreadLimitVal = nullptr;
6467  } else {
6468    // Default to "0" which means runtime choice.
6469    assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6470    NumThreadsVal = CGF.Builder.getInt32(0);
6471  }
6472
6473  // Handle if clause. If if clause present, the number of threads is
6474  // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6475  if (CondVal) {
6476    CodeGenFunction::RunCleanupsScope Scope(CGF);
6477    NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6478                                             CGF.Builder.getInt32(1));
6479  }
6480
6481  // If the thread limit and num teams expression were present, take the
6482  // minimum.
6483  if (ThreadLimitVal) {
6484    NumThreadsVal = CGF.Builder.CreateSelect(
6485        CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6486        ThreadLimitVal, NumThreadsVal);
6487  }
6488
6489  return NumThreadsVal;
6490}
6491
6492namespace {
6493LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6494
6495// Utility to handle information from clauses associated with a given
6496// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6497// It provides a convenient interface to obtain the information and generate
6498// code for that information.
6499class MappableExprsHandler {
6500public:
6501  /// Get the offset of the OMP_MAP_MEMBER_OF field.
6502  static unsigned getFlagMemberOffset() {
6503    unsigned Offset = 0;
6504    for (uint64_t Remain =
6505             static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6506                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6507         !(Remain & 1); Remain = Remain >> 1)
6508      Offset++;
6509    return Offset;
6510  }
6511
6512  /// Class that holds debugging information for a data mapping to be passed to
6513  /// the runtime library.
6514  class MappingExprInfo {
6515    /// The variable declaration used for the data mapping.
6516    const ValueDecl *MapDecl = nullptr;
6517    /// The original expression used in the map clause, or null if there is
6518    /// none.
6519    const Expr *MapExpr = nullptr;
6520
6521  public:
6522    MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6523        : MapDecl(MapDecl), MapExpr(MapExpr) {}
6524
6525    const ValueDecl *getMapDecl() const { return MapDecl; }
6526    const Expr *getMapExpr() const { return MapExpr; }
6527  };
6528
6529  using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6530  using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6531  using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6532  using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6533  using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6534  using MapNonContiguousArrayTy =
6535      llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6536  using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6537  using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6538
6539  /// This structure contains combined information generated for mappable
6540  /// clauses, including base pointers, pointers, sizes, map types, user-defined
6541  /// mappers, and non-contiguous information.
6542  struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6543    MapExprsArrayTy Exprs;
6544    MapValueDeclsArrayTy Mappers;
6545    MapValueDeclsArrayTy DevicePtrDecls;
6546
6547    /// Append arrays in \a CurInfo.
6548    void append(MapCombinedInfoTy &CurInfo) {
6549      Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6550      DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6551                            CurInfo.DevicePtrDecls.end());
6552      Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6553      llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6554    }
6555  };
6556
6557  /// Map between a struct and the its lowest & highest elements which have been
6558  /// mapped.
6559  /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6560  ///                    HE(FieldIndex, Pointer)}
6561  struct StructRangeInfoTy {
6562    MapCombinedInfoTy PreliminaryMapData;
6563    std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6564        0, Address::invalid()};
6565    std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6566        0, Address::invalid()};
6567    Address Base = Address::invalid();
6568    Address LB = Address::invalid();
6569    bool IsArraySection = false;
6570    bool HasCompleteRecord = false;
6571  };
6572
6573private:
6574  /// Kind that defines how a device pointer has to be returned.
6575  struct MapInfo {
6576    OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6577    OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6578    ArrayRef<OpenMPMapModifierKind> MapModifiers;
6579    ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6580    bool ReturnDevicePointer = false;
6581    bool IsImplicit = false;
6582    const ValueDecl *Mapper = nullptr;
6583    const Expr *VarRef = nullptr;
6584    bool ForDeviceAddr = false;
6585
6586    MapInfo() = default;
6587    MapInfo(
6588        OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6589        OpenMPMapClauseKind MapType,
6590        ArrayRef<OpenMPMapModifierKind> MapModifiers,
6591        ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6592        bool ReturnDevicePointer, bool IsImplicit,
6593        const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6594        bool ForDeviceAddr = false)
6595        : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6596          MotionModifiers(MotionModifiers),
6597          ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6598          Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6599  };
6600
6601  /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6602  /// member and there is no map information about it, then emission of that
6603  /// entry is deferred until the whole struct has been processed.
6604  struct DeferredDevicePtrEntryTy {
6605    const Expr *IE = nullptr;
6606    const ValueDecl *VD = nullptr;
6607    bool ForDeviceAddr = false;
6608
6609    DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6610                             bool ForDeviceAddr)
6611        : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6612  };
6613
6614  /// The target directive from where the mappable clauses were extracted. It
6615  /// is either a executable directive or a user-defined mapper directive.
6616  llvm::PointerUnion<const OMPExecutableDirective *,
6617                     const OMPDeclareMapperDecl *>
6618      CurDir;
6619
6620  /// Function the directive is being generated for.
6621  CodeGenFunction &CGF;
6622
6623  /// Set of all first private variables in the current directive.
6624  /// bool data is set to true if the variable is implicitly marked as
6625  /// firstprivate, false otherwise.
6626  llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6627
6628  /// Map between device pointer declarations and their expression components.
6629  /// The key value for declarations in 'this' is null.
6630  llvm::DenseMap<
6631      const ValueDecl *,
6632      SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6633      DevPointersMap;
6634
6635  /// Map between device addr declarations and their expression components.
6636  /// The key value for declarations in 'this' is null.
6637  llvm::DenseMap<
6638      const ValueDecl *,
6639      SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6640      HasDevAddrsMap;
6641
6642  /// Map between lambda declarations and their map type.
6643  llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6644
6645  llvm::Value *getExprTypeSize(const Expr *E) const {
6646    QualType ExprTy = E->getType().getCanonicalType();
6647
6648    // Calculate the size for array shaping expression.
6649    if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6650      llvm::Value *Size =
6651          CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6652      for (const Expr *SE : OAE->getDimensions()) {
6653        llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6654        Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6655                                      CGF.getContext().getSizeType(),
6656                                      SE->getExprLoc());
6657        Size = CGF.Builder.CreateNUWMul(Size, Sz);
6658      }
6659      return Size;
6660    }
6661
6662    // Reference types are ignored for mapping purposes.
6663    if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6664      ExprTy = RefTy->getPointeeType().getCanonicalType();
6665
6666    // Given that an array section is considered a built-in type, we need to
6667    // do the calculation based on the length of the section instead of relying
6668    // on CGF.getTypeSize(E->getType()).
6669    if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6670      QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6671                            OAE->getBase()->IgnoreParenImpCasts())
6672                            .getCanonicalType();
6673
6674      // If there is no length associated with the expression and lower bound is
6675      // not specified too, that means we are using the whole length of the
6676      // base.
6677      if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6678          !OAE->getLowerBound())
6679        return CGF.getTypeSize(BaseTy);
6680
6681      llvm::Value *ElemSize;
6682      if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6683        ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6684      } else {
6685        const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6686        assert(ATy && "Expecting array type if not a pointer type.");
6687        ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6688      }
6689
6690      // If we don't have a length at this point, that is because we have an
6691      // array section with a single element.
6692      if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6693        return ElemSize;
6694
6695      if (const Expr *LenExpr = OAE->getLength()) {
6696        llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6697        LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6698                                             CGF.getContext().getSizeType(),
6699                                             LenExpr->getExprLoc());
6700        return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6701      }
6702      assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6703             OAE->getLowerBound() && "expected array_section[lb:].");
6704      // Size = sizetype - lb * elemtype;
6705      llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6706      llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6707      LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6708                                       CGF.getContext().getSizeType(),
6709                                       OAE->getLowerBound()->getExprLoc());
6710      LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6711      llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6712      llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6713      LengthVal = CGF.Builder.CreateSelect(
6714          Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6715      return LengthVal;
6716    }
6717    return CGF.getTypeSize(ExprTy);
6718  }
6719
6720  /// Return the corresponding bits for a given map clause modifier. Add
6721  /// a flag marking the map as a pointer if requested. Add a flag marking the
6722  /// map as the first one of a series of maps that relate to the same map
6723  /// expression.
6724  OpenMPOffloadMappingFlags getMapTypeBits(
6725      OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6726      ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6727      bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6728    OpenMPOffloadMappingFlags Bits =
6729        IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6730                   : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6731    switch (MapType) {
6732    case OMPC_MAP_alloc:
6733    case OMPC_MAP_release:
6734      // alloc and release is the default behavior in the runtime library,  i.e.
6735      // if we don't pass any bits alloc/release that is what the runtime is
6736      // going to do. Therefore, we don't need to signal anything for these two
6737      // type modifiers.
6738      break;
6739    case OMPC_MAP_to:
6740      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6741      break;
6742    case OMPC_MAP_from:
6743      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6744      break;
6745    case OMPC_MAP_tofrom:
6746      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6747              OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6748      break;
6749    case OMPC_MAP_delete:
6750      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6751      break;
6752    case OMPC_MAP_unknown:
6753      llvm_unreachable("Unexpected map type!");
6754    }
6755    if (AddPtrFlag)
6756      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6757    if (AddIsTargetParamFlag)
6758      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6759    if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6760      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6761    if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6762      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6763    if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6764        llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6765      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6766    if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6767      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6768    if (IsNonContiguous)
6769      Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6770    return Bits;
6771  }
6772
6773  /// Return true if the provided expression is a final array section. A
6774  /// final array section, is one whose length can't be proved to be one.
6775  bool isFinalArraySectionExpression(const Expr *E) const {
6776    const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
6777
6778    // It is not an array section and therefore not a unity-size one.
6779    if (!OASE)
6780      return false;
6781
6782    // An array section with no colon always refer to a single element.
6783    if (OASE->getColonLocFirst().isInvalid())
6784      return false;
6785
6786    const Expr *Length = OASE->getLength();
6787
6788    // If we don't have a length we have to check if the array has size 1
6789    // for this dimension. Also, we should always expect a length if the
6790    // base type is pointer.
6791    if (!Length) {
6792      QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
6793                             OASE->getBase()->IgnoreParenImpCasts())
6794                             .getCanonicalType();
6795      if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6796        return ATy->getSize().getSExtValue() != 1;
6797      // If we don't have a constant dimension length, we have to consider
6798      // the current section as having any size, so it is not necessarily
6799      // unitary. If it happen to be unity size, that's user fault.
6800      return true;
6801    }
6802
6803    // Check if the length evaluates to 1.
6804    Expr::EvalResult Result;
6805    if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6806      return true; // Can have more that size 1.
6807
6808    llvm::APSInt ConstLength = Result.Val.getInt();
6809    return ConstLength.getSExtValue() != 1;
6810  }
6811
6812  /// Generate the base pointers, section pointers, sizes, map type bits, and
6813  /// user-defined mappers (all included in \a CombinedInfo) for the provided
6814  /// map type, map or motion modifiers, and expression components.
6815  /// \a IsFirstComponent should be set to true if the provided set of
6816  /// components is the first associated with a capture.
6817  void generateInfoForComponentList(
6818      OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6819      ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6820      OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6821      MapCombinedInfoTy &CombinedInfo,
6822      MapCombinedInfoTy &StructBaseCombinedInfo,
6823      StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6824      bool IsImplicit, bool GenerateAllInfoForClauses,
6825      const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6826      const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6827      ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6828          OverlappedElements = std::nullopt) const {
6829    // The following summarizes what has to be generated for each map and the
6830    // types below. The generated information is expressed in this order:
6831    // base pointer, section pointer, size, flags
6832    // (to add to the ones that come from the map type and modifier).
6833    //
6834    // double d;
6835    // int i[100];
6836    // float *p;
6837    // int **a = &i;
6838    //
6839    // struct S1 {
6840    //   int i;
6841    //   float f[50];
6842    // }
6843    // struct S2 {
6844    //   int i;
6845    //   float f[50];
6846    //   S1 s;
6847    //   double *p;
6848    //   struct S2 *ps;
6849    //   int &ref;
6850    // }
6851    // S2 s;
6852    // S2 *ps;
6853    //
6854    // map(d)
6855    // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6856    //
6857    // map(i)
6858    // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6859    //
6860    // map(i[1:23])
6861    // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6862    //
6863    // map(p)
6864    // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6865    //
6866    // map(p[1:24])
6867    // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6868    // in unified shared memory mode or for local pointers
6869    // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6870    //
6871    // map((*a)[0:3])
6872    // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6873    // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6874    //
6875    // map(**a)
6876    // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6877    // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6878    //
6879    // map(s)
6880    // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6881    //
6882    // map(s.i)
6883    // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6884    //
6885    // map(s.s.f)
6886    // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6887    //
6888    // map(s.p)
6889    // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6890    //
6891    // map(to: s.p[:22])
6892    // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6893    // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6894    // &(s.p), &(s.p[0]), 22*sizeof(double),
6895    //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6896    // (*) alloc space for struct members, only this is a target parameter
6897    // (**) map the pointer (nothing to be mapped in this example) (the compiler
6898    //      optimizes this entry out, same in the examples below)
6899    // (***) map the pointee (map: to)
6900    //
6901    // map(to: s.ref)
6902    // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6903    // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6904    // (*) alloc space for struct members, only this is a target parameter
6905    // (**) map the pointer (nothing to be mapped in this example) (the compiler
6906    //      optimizes this entry out, same in the examples below)
6907    // (***) map the pointee (map: to)
6908    //
6909    // map(s.ps)
6910    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6911    //
6912    // map(from: s.ps->s.i)
6913    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6914    // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6915    // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
6916    //
6917    // map(to: s.ps->ps)
6918    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6919    // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6920    // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
6921    //
6922    // map(s.ps->ps->ps)
6923    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6924    // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6925    // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6926    // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6927    //
6928    // map(to: s.ps->ps->s.f[:22])
6929    // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6930    // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6931    // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6932    // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6933    //
6934    // map(ps)
6935    // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6936    //
6937    // map(ps->i)
6938    // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6939    //
6940    // map(ps->s.f)
6941    // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6942    //
6943    // map(from: ps->p)
6944    // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6945    //
6946    // map(to: ps->p[:22])
6947    // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6948    // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6949    // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6950    //
6951    // map(ps->ps)
6952    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6953    //
6954    // map(from: ps->ps->s.i)
6955    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6956    // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6957    // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6958    //
6959    // map(from: ps->ps->ps)
6960    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6961    // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6962    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6963    //
6964    // map(ps->ps->ps->ps)
6965    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6966    // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6967    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6968    // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6969    //
6970    // map(to: ps->ps->ps->s.f[:22])
6971    // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6972    // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6973    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6974    // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6975    //
6976    // map(to: s.f[:22]) map(from: s.p[:33])
6977    // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6978    //     sizeof(double*) (**), TARGET_PARAM
6979    // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6980    // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6981    // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6982    // (*) allocate contiguous space needed to fit all mapped members even if
6983    //     we allocate space for members not mapped (in this example,
6984    //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
6985    //     them as well because they fall between &s.f[0] and &s.p)
6986    //
6987    // map(from: s.f[:22]) map(to: ps->p[:33])
6988    // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6989    // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6990    // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6991    // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6992    // (*) the struct this entry pertains to is the 2nd element in the list of
6993    //     arguments, hence MEMBER_OF(2)
6994    //
6995    // map(from: s.f[:22], s.s) map(to: ps->p[:33])
6996    // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
6997    // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
6998    // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
6999    // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7000    // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7001    // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7002    // (*) the struct this entry pertains to is the 4th element in the list
7003    //     of arguments, hence MEMBER_OF(4)
7004
7005    // Track if the map information being generated is the first for a capture.
7006    bool IsCaptureFirstInfo = IsFirstComponentList;
7007    // When the variable is on a declare target link or in a to clause with
7008    // unified memory, a reference is needed to hold the host/device address
7009    // of the variable.
7010    bool RequiresReference = false;
7011
7012    // Scan the components from the base to the complete expression.
7013    auto CI = Components.rbegin();
7014    auto CE = Components.rend();
7015    auto I = CI;
7016
7017    // Track if the map information being generated is the first for a list of
7018    // components.
7019    bool IsExpressionFirstInfo = true;
7020    bool FirstPointerInComplexData = false;
7021    Address BP = Address::invalid();
7022    const Expr *AssocExpr = I->getAssociatedExpression();
7023    const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7024    const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7025    const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7026
7027    if (isa<MemberExpr>(AssocExpr)) {
7028      // The base is the 'this' pointer. The content of the pointer is going
7029      // to be the base of the field being mapped.
7030      BP = CGF.LoadCXXThisAddress();
7031    } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7032               (OASE &&
7033                isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7034      BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7035    } else if (OAShE &&
7036               isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7037      BP = Address(
7038          CGF.EmitScalarExpr(OAShE->getBase()),
7039          CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7040          CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7041    } else {
7042      // The base is the reference to the variable.
7043      // BP = &Var.
7044      BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7045      if (const auto *VD =
7046              dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7047        if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7048                OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7049          if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7050              ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7051                *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7052               CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7053            RequiresReference = true;
7054            BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7055          }
7056        }
7057      }
7058
7059      // If the variable is a pointer and is being dereferenced (i.e. is not
7060      // the last component), the base has to be the pointer itself, not its
7061      // reference. References are ignored for mapping purposes.
7062      QualType Ty =
7063          I->getAssociatedDeclaration()->getType().getNonReferenceType();
7064      if (Ty->isAnyPointerType() && std::next(I) != CE) {
7065        // No need to generate individual map information for the pointer, it
7066        // can be associated with the combined storage if shared memory mode is
7067        // active or the base declaration is not global variable.
7068        const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7069        if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7070            !VD || VD->hasLocalStorage())
7071          BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7072        else
7073          FirstPointerInComplexData = true;
7074        ++I;
7075      }
7076    }
7077
7078    // Track whether a component of the list should be marked as MEMBER_OF some
7079    // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7080    // in a component list should be marked as MEMBER_OF, all subsequent entries
7081    // do not belong to the base struct. E.g.
7082    // struct S2 s;
7083    // s.ps->ps->ps->f[:]
7084    //   (1) (2) (3) (4)
7085    // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7086    // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7087    // is the pointee of ps(2) which is not member of struct s, so it should not
7088    // be marked as such (it is still PTR_AND_OBJ).
7089    // The variable is initialized to false so that PTR_AND_OBJ entries which
7090    // are not struct members are not considered (e.g. array of pointers to
7091    // data).
7092    bool ShouldBeMemberOf = false;
7093
7094    // Variable keeping track of whether or not we have encountered a component
7095    // in the component list which is a member expression. Useful when we have a
7096    // pointer or a final array section, in which case it is the previous
7097    // component in the list which tells us whether we have a member expression.
7098    // E.g. X.f[:]
7099    // While processing the final array section "[:]" it is "f" which tells us
7100    // whether we are dealing with a member of a declared struct.
7101    const MemberExpr *EncounteredME = nullptr;
7102
7103    // Track for the total number of dimension. Start from one for the dummy
7104    // dimension.
7105    uint64_t DimSize = 1;
7106
7107    bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7108    bool IsPrevMemberReference = false;
7109
7110    // We need to check if we will be encountering any MEs. If we do not
7111    // encounter any ME expression it means we will be mapping the whole struct.
7112    // In that case we need to skip adding an entry for the struct to the
7113    // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7114    // list only when generating all info for clauses.
7115    bool IsMappingWholeStruct = true;
7116    if (!GenerateAllInfoForClauses) {
7117      IsMappingWholeStruct = false;
7118    } else {
7119      for (auto TempI = I; TempI != CE; ++TempI) {
7120        const MemberExpr *PossibleME =
7121            dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7122        if (PossibleME) {
7123          IsMappingWholeStruct = false;
7124          break;
7125        }
7126      }
7127    }
7128
7129    for (; I != CE; ++I) {
7130      // If the current component is member of a struct (parent struct) mark it.
7131      if (!EncounteredME) {
7132        EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7133        // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7134        // as MEMBER_OF the parent struct.
7135        if (EncounteredME) {
7136          ShouldBeMemberOf = true;
7137          // Do not emit as complex pointer if this is actually not array-like
7138          // expression.
7139          if (FirstPointerInComplexData) {
7140            QualType Ty = std::prev(I)
7141                              ->getAssociatedDeclaration()
7142                              ->getType()
7143                              .getNonReferenceType();
7144            BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7145            FirstPointerInComplexData = false;
7146          }
7147        }
7148      }
7149
7150      auto Next = std::next(I);
7151
7152      // We need to generate the addresses and sizes if this is the last
7153      // component, if the component is a pointer or if it is an array section
7154      // whose length can't be proved to be one. If this is a pointer, it
7155      // becomes the base address for the following components.
7156
7157      // A final array section, is one whose length can't be proved to be one.
7158      // If the map item is non-contiguous then we don't treat any array section
7159      // as final array section.
7160      bool IsFinalArraySection =
7161          !IsNonContiguous &&
7162          isFinalArraySectionExpression(I->getAssociatedExpression());
7163
7164      // If we have a declaration for the mapping use that, otherwise use
7165      // the base declaration of the map clause.
7166      const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7167                                     ? I->getAssociatedDeclaration()
7168                                     : BaseDecl;
7169      MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7170                                               : MapExpr;
7171
7172      // Get information on whether the element is a pointer. Have to do a
7173      // special treatment for array sections given that they are built-in
7174      // types.
7175      const auto *OASE =
7176          dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7177      const auto *OAShE =
7178          dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7179      const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7180      const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7181      bool IsPointer =
7182          OAShE ||
7183          (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7184                       .getCanonicalType()
7185                       ->isAnyPointerType()) ||
7186          I->getAssociatedExpression()->getType()->isAnyPointerType();
7187      bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7188                               MapDecl &&
7189                               MapDecl->getType()->isLValueReferenceType();
7190      bool IsNonDerefPointer = IsPointer &&
7191                               !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7192                               !IsNonContiguous;
7193
7194      if (OASE)
7195        ++DimSize;
7196
7197      if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7198          IsFinalArraySection) {
7199        // If this is not the last component, we expect the pointer to be
7200        // associated with an array expression or member expression.
7201        assert((Next == CE ||
7202                isa<MemberExpr>(Next->getAssociatedExpression()) ||
7203                isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7204                isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7205                isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7206                isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7207                isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7208               "Unexpected expression");
7209
7210        Address LB = Address::invalid();
7211        Address LowestElem = Address::invalid();
7212        auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7213                                       const MemberExpr *E) {
7214          const Expr *BaseExpr = E->getBase();
7215          // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7216          // scalar.
7217          LValue BaseLV;
7218          if (E->isArrow()) {
7219            LValueBaseInfo BaseInfo;
7220            TBAAAccessInfo TBAAInfo;
7221            Address Addr =
7222                CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7223            QualType PtrTy = BaseExpr->getType()->getPointeeType();
7224            BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7225          } else {
7226            BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7227          }
7228          return BaseLV;
7229        };
7230        if (OAShE) {
7231          LowestElem = LB =
7232              Address(CGF.EmitScalarExpr(OAShE->getBase()),
7233                      CGF.ConvertTypeForMem(
7234                          OAShE->getBase()->getType()->getPointeeType()),
7235                      CGF.getContext().getTypeAlignInChars(
7236                          OAShE->getBase()->getType()));
7237        } else if (IsMemberReference) {
7238          const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7239          LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7240          LowestElem = CGF.EmitLValueForFieldInitialization(
7241                              BaseLVal, cast<FieldDecl>(MapDecl))
7242                           .getAddress(CGF);
7243          LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7244                   .getAddress(CGF);
7245        } else {
7246          LowestElem = LB =
7247              CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7248                  .getAddress(CGF);
7249        }
7250
7251        // If this component is a pointer inside the base struct then we don't
7252        // need to create any entry for it - it will be combined with the object
7253        // it is pointing to into a single PTR_AND_OBJ entry.
7254        bool IsMemberPointerOrAddr =
7255            EncounteredME &&
7256            (((IsPointer || ForDeviceAddr) &&
7257              I->getAssociatedExpression() == EncounteredME) ||
7258             (IsPrevMemberReference && !IsPointer) ||
7259             (IsMemberReference && Next != CE &&
7260              !Next->getAssociatedExpression()->getType()->isPointerType()));
7261        if (!OverlappedElements.empty() && Next == CE) {
7262          // Handle base element with the info for overlapped elements.
7263          assert(!PartialStruct.Base.isValid() && "The base element is set.");
7264          assert(!IsPointer &&
7265                 "Unexpected base element with the pointer type.");
7266          // Mark the whole struct as the struct that requires allocation on the
7267          // device.
7268          PartialStruct.LowestElem = {0, LowestElem};
7269          CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7270              I->getAssociatedExpression()->getType());
7271          Address HB = CGF.Builder.CreateConstGEP(
7272              CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7273                  LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7274              TypeSize.getQuantity() - 1);
7275          PartialStruct.HighestElem = {
7276              std::numeric_limits<decltype(
7277                  PartialStruct.HighestElem.first)>::max(),
7278              HB};
7279          PartialStruct.Base = BP;
7280          PartialStruct.LB = LB;
7281          assert(
7282              PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7283              "Overlapped elements must be used only once for the variable.");
7284          std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7285          // Emit data for non-overlapped data.
7286          OpenMPOffloadMappingFlags Flags =
7287              OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7288              getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7289                             /*AddPtrFlag=*/false,
7290                             /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7291          llvm::Value *Size = nullptr;
7292          // Do bitcopy of all non-overlapped structure elements.
7293          for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7294                   Component : OverlappedElements) {
7295            Address ComponentLB = Address::invalid();
7296            for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7297                 Component) {
7298              if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7299                const auto *FD = dyn_cast<FieldDecl>(VD);
7300                if (FD && FD->getType()->isLValueReferenceType()) {
7301                  const auto *ME =
7302                      cast<MemberExpr>(MC.getAssociatedExpression());
7303                  LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7304                  ComponentLB =
7305                      CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7306                          .getAddress(CGF);
7307                } else {
7308                  ComponentLB =
7309                      CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7310                          .getAddress(CGF);
7311                }
7312                Size = CGF.Builder.CreatePtrDiff(
7313                    CGF.Int8Ty, ComponentLB.getPointer(), LB.getPointer());
7314                break;
7315              }
7316            }
7317            assert(Size && "Failed to determine structure size");
7318            CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7319            CombinedInfo.BasePointers.push_back(BP.getPointer());
7320            CombinedInfo.DevicePtrDecls.push_back(nullptr);
7321            CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7322            CombinedInfo.Pointers.push_back(LB.getPointer());
7323            CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7324                Size, CGF.Int64Ty, /*isSigned=*/true));
7325            CombinedInfo.Types.push_back(Flags);
7326            CombinedInfo.Mappers.push_back(nullptr);
7327            CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7328                                                                      : 1);
7329            LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7330          }
7331          CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7332          CombinedInfo.BasePointers.push_back(BP.getPointer());
7333          CombinedInfo.DevicePtrDecls.push_back(nullptr);
7334          CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7335          CombinedInfo.Pointers.push_back(LB.getPointer());
7336          Size = CGF.Builder.CreatePtrDiff(
7337              CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7338              LB.getPointer());
7339          CombinedInfo.Sizes.push_back(
7340              CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7341          CombinedInfo.Types.push_back(Flags);
7342          CombinedInfo.Mappers.push_back(nullptr);
7343          CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7344                                                                    : 1);
7345          break;
7346        }
7347        llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7348        // Skip adding an entry in the CurInfo of this combined entry if the
7349        // whole struct is currently being mapped. The struct needs to be added
7350        // in the first position before any data internal to the struct is being
7351        // mapped.
7352        if (!IsMemberPointerOrAddr ||
7353            (Next == CE && MapType != OMPC_MAP_unknown)) {
7354          if (!IsMappingWholeStruct) {
7355            CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7356            CombinedInfo.BasePointers.push_back(BP.getPointer());
7357            CombinedInfo.DevicePtrDecls.push_back(nullptr);
7358            CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7359            CombinedInfo.Pointers.push_back(LB.getPointer());
7360            CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7361                Size, CGF.Int64Ty, /*isSigned=*/true));
7362            CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7363                                                                      : 1);
7364          } else {
7365            StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7366            StructBaseCombinedInfo.BasePointers.push_back(BP.getPointer());
7367            StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7368            StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7369            StructBaseCombinedInfo.Pointers.push_back(LB.getPointer());
7370            StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7371                Size, CGF.Int64Ty, /*isSigned=*/true));
7372            StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7373                IsNonContiguous ? DimSize : 1);
7374          }
7375
7376          // If Mapper is valid, the last component inherits the mapper.
7377          bool HasMapper = Mapper && Next == CE;
7378          if (!IsMappingWholeStruct)
7379            CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7380          else
7381            StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7382                                                               : nullptr);
7383
7384          // We need to add a pointer flag for each map that comes from the
7385          // same expression except for the first one. We also need to signal
7386          // this map is the first one that relates with the current capture
7387          // (there is a set of entries for each capture).
7388          OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7389              MapType, MapModifiers, MotionModifiers, IsImplicit,
7390              !IsExpressionFirstInfo || RequiresReference ||
7391                  FirstPointerInComplexData || IsMemberReference,
7392              IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7393
7394          if (!IsExpressionFirstInfo || IsMemberReference) {
7395            // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7396            // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7397            if (IsPointer || (IsMemberReference && Next != CE))
7398              Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7399                         OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7400                         OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7401                         OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7402                         OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7403
7404            if (ShouldBeMemberOf) {
7405              // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7406              // should be later updated with the correct value of MEMBER_OF.
7407              Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7408              // From now on, all subsequent PTR_AND_OBJ entries should not be
7409              // marked as MEMBER_OF.
7410              ShouldBeMemberOf = false;
7411            }
7412          }
7413
7414          if (!IsMappingWholeStruct)
7415            CombinedInfo.Types.push_back(Flags);
7416          else
7417            StructBaseCombinedInfo.Types.push_back(Flags);
7418        }
7419
7420        // If we have encountered a member expression so far, keep track of the
7421        // mapped member. If the parent is "*this", then the value declaration
7422        // is nullptr.
7423        if (EncounteredME) {
7424          const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7425          unsigned FieldIndex = FD->getFieldIndex();
7426
7427          // Update info about the lowest and highest elements for this struct
7428          if (!PartialStruct.Base.isValid()) {
7429            PartialStruct.LowestElem = {FieldIndex, LowestElem};
7430            if (IsFinalArraySection) {
7431              Address HB =
7432                  CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7433                      .getAddress(CGF);
7434              PartialStruct.HighestElem = {FieldIndex, HB};
7435            } else {
7436              PartialStruct.HighestElem = {FieldIndex, LowestElem};
7437            }
7438            PartialStruct.Base = BP;
7439            PartialStruct.LB = BP;
7440          } else if (FieldIndex < PartialStruct.LowestElem.first) {
7441            PartialStruct.LowestElem = {FieldIndex, LowestElem};
7442          } else if (FieldIndex > PartialStruct.HighestElem.first) {
7443            if (IsFinalArraySection) {
7444              Address HB =
7445                  CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7446                      .getAddress(CGF);
7447              PartialStruct.HighestElem = {FieldIndex, HB};
7448            } else {
7449              PartialStruct.HighestElem = {FieldIndex, LowestElem};
7450            }
7451          }
7452        }
7453
7454        // Need to emit combined struct for array sections.
7455        if (IsFinalArraySection || IsNonContiguous)
7456          PartialStruct.IsArraySection = true;
7457
7458        // If we have a final array section, we are done with this expression.
7459        if (IsFinalArraySection)
7460          break;
7461
7462        // The pointer becomes the base for the next element.
7463        if (Next != CE)
7464          BP = IsMemberReference ? LowestElem : LB;
7465
7466        IsExpressionFirstInfo = false;
7467        IsCaptureFirstInfo = false;
7468        FirstPointerInComplexData = false;
7469        IsPrevMemberReference = IsMemberReference;
7470      } else if (FirstPointerInComplexData) {
7471        QualType Ty = Components.rbegin()
7472                          ->getAssociatedDeclaration()
7473                          ->getType()
7474                          .getNonReferenceType();
7475        BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7476        FirstPointerInComplexData = false;
7477      }
7478    }
7479    // If ran into the whole component - allocate the space for the whole
7480    // record.
7481    if (!EncounteredME)
7482      PartialStruct.HasCompleteRecord = true;
7483
7484    if (!IsNonContiguous)
7485      return;
7486
7487    const ASTContext &Context = CGF.getContext();
7488
7489    // For supporting stride in array section, we need to initialize the first
7490    // dimension size as 1, first offset as 0, and first count as 1
7491    MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7492    MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7493    MapValuesArrayTy CurStrides;
7494    MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7495    uint64_t ElementTypeSize;
7496
7497    // Collect Size information for each dimension and get the element size as
7498    // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7499    // should be [10, 10] and the first stride is 4 btyes.
7500    for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7501         Components) {
7502      const Expr *AssocExpr = Component.getAssociatedExpression();
7503      const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7504
7505      if (!OASE)
7506        continue;
7507
7508      QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7509      auto *CAT = Context.getAsConstantArrayType(Ty);
7510      auto *VAT = Context.getAsVariableArrayType(Ty);
7511
7512      // We need all the dimension size except for the last dimension.
7513      assert((VAT || CAT || &Component == &*Components.begin()) &&
7514             "Should be either ConstantArray or VariableArray if not the "
7515             "first Component");
7516
7517      // Get element size if CurStrides is empty.
7518      if (CurStrides.empty()) {
7519        const Type *ElementType = nullptr;
7520        if (CAT)
7521          ElementType = CAT->getElementType().getTypePtr();
7522        else if (VAT)
7523          ElementType = VAT->getElementType().getTypePtr();
7524        else
7525          assert(&Component == &*Components.begin() &&
7526                 "Only expect pointer (non CAT or VAT) when this is the "
7527                 "first Component");
7528        // If ElementType is null, then it means the base is a pointer
7529        // (neither CAT nor VAT) and we'll attempt to get ElementType again
7530        // for next iteration.
7531        if (ElementType) {
7532          // For the case that having pointer as base, we need to remove one
7533          // level of indirection.
7534          if (&Component != &*Components.begin())
7535            ElementType = ElementType->getPointeeOrArrayElementType();
7536          ElementTypeSize =
7537              Context.getTypeSizeInChars(ElementType).getQuantity();
7538          CurStrides.push_back(
7539              llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7540        }
7541      }
7542      // Get dimension value except for the last dimension since we don't need
7543      // it.
7544      if (DimSizes.size() < Components.size() - 1) {
7545        if (CAT)
7546          DimSizes.push_back(llvm::ConstantInt::get(
7547              CGF.Int64Ty, CAT->getSize().getZExtValue()));
7548        else if (VAT)
7549          DimSizes.push_back(CGF.Builder.CreateIntCast(
7550              CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7551              /*IsSigned=*/false));
7552      }
7553    }
7554
7555    // Skip the dummy dimension since we have already have its information.
7556    auto *DI = DimSizes.begin() + 1;
7557    // Product of dimension.
7558    llvm::Value *DimProd =
7559        llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7560
7561    // Collect info for non-contiguous. Notice that offset, count, and stride
7562    // are only meaningful for array-section, so we insert a null for anything
7563    // other than array-section.
7564    // Also, the size of offset, count, and stride are not the same as
7565    // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7566    // count, and stride are the same as the number of non-contiguous
7567    // declaration in target update to/from clause.
7568    for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7569         Components) {
7570      const Expr *AssocExpr = Component.getAssociatedExpression();
7571
7572      if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7573        llvm::Value *Offset = CGF.Builder.CreateIntCast(
7574            CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7575            /*isSigned=*/false);
7576        CurOffsets.push_back(Offset);
7577        CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7578        CurStrides.push_back(CurStrides.back());
7579        continue;
7580      }
7581
7582      const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7583
7584      if (!OASE)
7585        continue;
7586
7587      // Offset
7588      const Expr *OffsetExpr = OASE->getLowerBound();
7589      llvm::Value *Offset = nullptr;
7590      if (!OffsetExpr) {
7591        // If offset is absent, then we just set it to zero.
7592        Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7593      } else {
7594        Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7595                                           CGF.Int64Ty,
7596                                           /*isSigned=*/false);
7597      }
7598      CurOffsets.push_back(Offset);
7599
7600      // Count
7601      const Expr *CountExpr = OASE->getLength();
7602      llvm::Value *Count = nullptr;
7603      if (!CountExpr) {
7604        // In Clang, once a high dimension is an array section, we construct all
7605        // the lower dimension as array section, however, for case like
7606        // arr[0:2][2], Clang construct the inner dimension as an array section
7607        // but it actually is not in an array section form according to spec.
7608        if (!OASE->getColonLocFirst().isValid() &&
7609            !OASE->getColonLocSecond().isValid()) {
7610          Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7611        } else {
7612          // OpenMP 5.0, 2.1.5 Array Sections, Description.
7613          // When the length is absent it defaults to ���(size ���
7614          // lower-bound)/stride���, where size is the size of the array
7615          // dimension.
7616          const Expr *StrideExpr = OASE->getStride();
7617          llvm::Value *Stride =
7618              StrideExpr
7619                  ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7620                                              CGF.Int64Ty, /*isSigned=*/false)
7621                  : nullptr;
7622          if (Stride)
7623            Count = CGF.Builder.CreateUDiv(
7624                CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7625          else
7626            Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7627        }
7628      } else {
7629        Count = CGF.EmitScalarExpr(CountExpr);
7630      }
7631      Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7632      CurCounts.push_back(Count);
7633
7634      // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7635      // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7636      //              Offset      Count     Stride
7637      //    D0          0           1         4    (int)    <- dummy dimension
7638      //    D1          0           2         8    (2 * (1) * 4)
7639      //    D2          1           2         20   (1 * (1 * 5) * 4)
7640      //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
7641      const Expr *StrideExpr = OASE->getStride();
7642      llvm::Value *Stride =
7643          StrideExpr
7644              ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7645                                          CGF.Int64Ty, /*isSigned=*/false)
7646              : nullptr;
7647      DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7648      if (Stride)
7649        CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7650      else
7651        CurStrides.push_back(DimProd);
7652      if (DI != DimSizes.end())
7653        ++DI;
7654    }
7655
7656    CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7657    CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7658    CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7659  }
7660
7661  /// Return the adjusted map modifiers if the declaration a capture refers to
7662  /// appears in a first-private clause. This is expected to be used only with
7663  /// directives that start with 'target'.
7664  OpenMPOffloadMappingFlags
7665  getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7666    assert(Cap.capturesVariable() && "Expected capture by reference only!");
7667
7668    // A first private variable captured by reference will use only the
7669    // 'private ptr' and 'map to' flag. Return the right flags if the captured
7670    // declaration is known as first-private in this handler.
7671    if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7672      if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7673        return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7674               OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7675      return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7676             OpenMPOffloadMappingFlags::OMP_MAP_TO;
7677    }
7678    auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7679    if (I != LambdasMap.end())
7680      // for map(to: lambda): using user specified map type.
7681      return getMapTypeBits(
7682          I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7683          /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7684          /*AddPtrFlag=*/false,
7685          /*AddIsTargetParamFlag=*/false,
7686          /*isNonContiguous=*/false);
7687    return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7688           OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7689  }
7690
7691  void getPlainLayout(const CXXRecordDecl *RD,
7692                      llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7693                      bool AsBase) const {
7694    const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7695
7696    llvm::StructType *St =
7697        AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7698
7699    unsigned NumElements = St->getNumElements();
7700    llvm::SmallVector<
7701        llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7702        RecordLayout(NumElements);
7703
7704    // Fill bases.
7705    for (const auto &I : RD->bases()) {
7706      if (I.isVirtual())
7707        continue;
7708      const auto *Base = I.getType()->getAsCXXRecordDecl();
7709      // Ignore empty bases.
7710      if (Base->isEmpty() || CGF.getContext()
7711                                 .getASTRecordLayout(Base)
7712                                 .getNonVirtualSize()
7713                                 .isZero())
7714        continue;
7715
7716      unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7717      RecordLayout[FieldIndex] = Base;
7718    }
7719    // Fill in virtual bases.
7720    for (const auto &I : RD->vbases()) {
7721      const auto *Base = I.getType()->getAsCXXRecordDecl();
7722      // Ignore empty bases.
7723      if (Base->isEmpty())
7724        continue;
7725      unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7726      if (RecordLayout[FieldIndex])
7727        continue;
7728      RecordLayout[FieldIndex] = Base;
7729    }
7730    // Fill in all the fields.
7731    assert(!RD->isUnion() && "Unexpected union.");
7732    for (const auto *Field : RD->fields()) {
7733      // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7734      // will fill in later.)
7735      if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7736        unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7737        RecordLayout[FieldIndex] = Field;
7738      }
7739    }
7740    for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7741             &Data : RecordLayout) {
7742      if (Data.isNull())
7743        continue;
7744      if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7745        getPlainLayout(Base, Layout, /*AsBase=*/true);
7746      else
7747        Layout.push_back(Data.get<const FieldDecl *>());
7748    }
7749  }
7750
7751  /// Generate all the base pointers, section pointers, sizes, map types, and
7752  /// mappers for the extracted mappable expressions (all included in \a
7753  /// CombinedInfo). Also, for each item that relates with a device pointer, a
7754  /// pair of the relevant declaration and index where it occurs is appended to
7755  /// the device pointers info array.
7756  void generateAllInfoForClauses(
7757      ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7758      llvm::OpenMPIRBuilder &OMPBuilder,
7759      const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7760          llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7761    // We have to process the component lists that relate with the same
7762    // declaration in a single chunk so that we can generate the map flags
7763    // correctly. Therefore, we organize all lists in a map.
7764    enum MapKind { Present, Allocs, Other, Total };
7765    llvm::MapVector<CanonicalDeclPtr<const Decl>,
7766                    SmallVector<SmallVector<MapInfo, 8>, 4>>
7767        Info;
7768
7769    // Helper function to fill the information map for the different supported
7770    // clauses.
7771    auto &&InfoGen =
7772        [&Info, &SkipVarSet](
7773            const ValueDecl *D, MapKind Kind,
7774            OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7775            OpenMPMapClauseKind MapType,
7776            ArrayRef<OpenMPMapModifierKind> MapModifiers,
7777            ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7778            bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7779            const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7780          if (SkipVarSet.contains(D))
7781            return;
7782          auto It = Info.find(D);
7783          if (It == Info.end())
7784            It = Info
7785                     .insert(std::make_pair(
7786                         D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
7787                     .first;
7788          It->second[Kind].emplace_back(
7789              L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7790              IsImplicit, Mapper, VarRef, ForDeviceAddr);
7791        };
7792
7793    for (const auto *Cl : Clauses) {
7794      const auto *C = dyn_cast<OMPMapClause>(Cl);
7795      if (!C)
7796        continue;
7797      MapKind Kind = Other;
7798      if (llvm::is_contained(C->getMapTypeModifiers(),
7799                             OMPC_MAP_MODIFIER_present))
7800        Kind = Present;
7801      else if (C->getMapType() == OMPC_MAP_alloc)
7802        Kind = Allocs;
7803      const auto *EI = C->getVarRefs().begin();
7804      for (const auto L : C->component_lists()) {
7805        const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7806        InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7807                C->getMapTypeModifiers(), std::nullopt,
7808                /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7809                E);
7810        ++EI;
7811      }
7812    }
7813    for (const auto *Cl : Clauses) {
7814      const auto *C = dyn_cast<OMPToClause>(Cl);
7815      if (!C)
7816        continue;
7817      MapKind Kind = Other;
7818      if (llvm::is_contained(C->getMotionModifiers(),
7819                             OMPC_MOTION_MODIFIER_present))
7820        Kind = Present;
7821      const auto *EI = C->getVarRefs().begin();
7822      for (const auto L : C->component_lists()) {
7823        InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
7824                C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7825                C->isImplicit(), std::get<2>(L), *EI);
7826        ++EI;
7827      }
7828    }
7829    for (const auto *Cl : Clauses) {
7830      const auto *C = dyn_cast<OMPFromClause>(Cl);
7831      if (!C)
7832        continue;
7833      MapKind Kind = Other;
7834      if (llvm::is_contained(C->getMotionModifiers(),
7835                             OMPC_MOTION_MODIFIER_present))
7836        Kind = Present;
7837      const auto *EI = C->getVarRefs().begin();
7838      for (const auto L : C->component_lists()) {
7839        InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
7840                std::nullopt, C->getMotionModifiers(),
7841                /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7842                *EI);
7843        ++EI;
7844      }
7845    }
7846
7847    // Look at the use_device_ptr and use_device_addr clauses information and
7848    // mark the existing map entries as such. If there is no map information for
7849    // an entry in the use_device_ptr and use_device_addr list, we create one
7850    // with map type 'alloc' and zero size section. It is the user fault if that
7851    // was not mapped before. If there is no map information and the pointer is
7852    // a struct member, then we defer the emission of that entry until the whole
7853    // struct has been processed.
7854    llvm::MapVector<CanonicalDeclPtr<const Decl>,
7855                    SmallVector<DeferredDevicePtrEntryTy, 4>>
7856        DeferredInfo;
7857    MapCombinedInfoTy UseDeviceDataCombinedInfo;
7858
7859    auto &&UseDeviceDataCombinedInfoGen =
7860        [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7861                                     CodeGenFunction &CGF, bool IsDevAddr) {
7862          UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7863          UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7864          UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7865          UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7866              IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7867          UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7868          UseDeviceDataCombinedInfo.Sizes.push_back(
7869              llvm::Constant::getNullValue(CGF.Int64Ty));
7870          UseDeviceDataCombinedInfo.Types.push_back(
7871              OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7872          UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7873        };
7874
7875    auto &&MapInfoGen =
7876        [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7877         &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7878                   OMPClauseMappableExprCommon::MappableExprComponentListRef
7879                       Components,
7880                   bool IsImplicit, bool IsDevAddr) {
7881          // We didn't find any match in our map information - generate a zero
7882          // size array section - if the pointer is a struct member we defer
7883          // this action until the whole struct has been processed.
7884          if (isa<MemberExpr>(IE)) {
7885            // Insert the pointer into Info to be processed by
7886            // generateInfoForComponentList. Because it is a member pointer
7887            // without a pointee, no entry will be generated for it, therefore
7888            // we need to generate one after the whole struct has been
7889            // processed. Nonetheless, generateInfoForComponentList must be
7890            // called to take the pointer into account for the calculation of
7891            // the range of the partial struct.
7892            InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7893                    std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7894                    nullptr, nullptr, IsDevAddr);
7895            DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7896          } else {
7897            llvm::Value *Ptr;
7898            if (IsDevAddr) {
7899              if (IE->isGLValue())
7900                Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7901              else
7902                Ptr = CGF.EmitScalarExpr(IE);
7903            } else {
7904              Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7905            }
7906            UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7907          }
7908        };
7909
7910    auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7911                                    const Expr *IE, bool IsDevAddr) -> bool {
7912      // We potentially have map information for this declaration already.
7913      // Look for the first set of components that refer to it. If found,
7914      // return true.
7915      // If the first component is a member expression, we have to look into
7916      // 'this', which maps to null in the map of map information. Otherwise
7917      // look directly for the information.
7918      auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7919      if (It != Info.end()) {
7920        bool Found = false;
7921        for (auto &Data : It->second) {
7922          auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7923            return MI.Components.back().getAssociatedDeclaration() == VD;
7924          });
7925          // If we found a map entry, signal that the pointer has to be
7926          // returned and move on to the next declaration. Exclude cases where
7927          // the base pointer is mapped as array subscript, array section or
7928          // array shaping. The base address is passed as a pointer to base in
7929          // this case and cannot be used as a base for use_device_ptr list
7930          // item.
7931          if (CI != Data.end()) {
7932            if (IsDevAddr) {
7933              CI->ForDeviceAddr = IsDevAddr;
7934              CI->ReturnDevicePointer = true;
7935              Found = true;
7936              break;
7937            } else {
7938              auto PrevCI = std::next(CI->Components.rbegin());
7939              const auto *VarD = dyn_cast<VarDecl>(VD);
7940              if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7941                  isa<MemberExpr>(IE) ||
7942                  !VD->getType().getNonReferenceType()->isPointerType() ||
7943                  PrevCI == CI->Components.rend() ||
7944                  isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7945                  VarD->hasLocalStorage()) {
7946                CI->ForDeviceAddr = IsDevAddr;
7947                CI->ReturnDevicePointer = true;
7948                Found = true;
7949                break;
7950              }
7951            }
7952          }
7953        }
7954        return Found;
7955      }
7956      return false;
7957    };
7958
7959    // Look at the use_device_ptr clause information and mark the existing map
7960    // entries as such. If there is no map information for an entry in the
7961    // use_device_ptr list, we create one with map type 'alloc' and zero size
7962    // section. It is the user fault if that was not mapped before. If there is
7963    // no map information and the pointer is a struct member, then we defer the
7964    // emission of that entry until the whole struct has been processed.
7965    for (const auto *Cl : Clauses) {
7966      const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
7967      if (!C)
7968        continue;
7969      for (const auto L : C->component_lists()) {
7970        OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
7971            std::get<1>(L);
7972        assert(!Components.empty() &&
7973               "Not expecting empty list of components!");
7974        const ValueDecl *VD = Components.back().getAssociatedDeclaration();
7975        VD = cast<ValueDecl>(VD->getCanonicalDecl());
7976        const Expr *IE = Components.back().getAssociatedExpression();
7977        if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
7978          continue;
7979        MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
7980                   /*IsDevAddr=*/false);
7981      }
7982    }
7983
7984    llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7985    for (const auto *Cl : Clauses) {
7986      const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
7987      if (!C)
7988        continue;
7989      for (const auto L : C->component_lists()) {
7990        OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
7991            std::get<1>(L);
7992        assert(!std::get<1>(L).empty() &&
7993               "Not expecting empty list of components!");
7994        const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
7995        if (!Processed.insert(VD).second)
7996          continue;
7997        VD = cast<ValueDecl>(VD->getCanonicalDecl());
7998        const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
7999        if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8000          continue;
8001        MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8002                   /*IsDevAddr=*/true);
8003      }
8004    }
8005
8006    for (const auto &Data : Info) {
8007      StructRangeInfoTy PartialStruct;
8008      // Current struct information:
8009      MapCombinedInfoTy CurInfo;
8010      // Current struct base information:
8011      MapCombinedInfoTy StructBaseCurInfo;
8012      const Decl *D = Data.first;
8013      const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8014      for (const auto &M : Data.second) {
8015        for (const MapInfo &L : M) {
8016          assert(!L.Components.empty() &&
8017                 "Not expecting declaration with no component lists.");
8018
8019          // Remember the current base pointer index.
8020          unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8021          unsigned StructBasePointersIdx =
8022              StructBaseCurInfo.BasePointers.size();
8023          CurInfo.NonContigInfo.IsNonContiguous =
8024              L.Components.back().isNonContiguous();
8025          generateInfoForComponentList(
8026              L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8027              CurInfo, StructBaseCurInfo, PartialStruct,
8028              /*IsFirstComponentList=*/false, L.IsImplicit,
8029              /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8030              L.VarRef);
8031
8032          // If this entry relates to a device pointer, set the relevant
8033          // declaration and add the 'return pointer' flag.
8034          if (L.ReturnDevicePointer) {
8035            // Check whether a value was added to either CurInfo or
8036            // StructBaseCurInfo and error if no value was added to either of
8037            // them:
8038            assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8039                    StructBasePointersIdx <
8040                        StructBaseCurInfo.BasePointers.size()) &&
8041                   "Unexpected number of mapped base pointers.");
8042
8043            // Choose a base pointer index which is always valid:
8044            const ValueDecl *RelevantVD =
8045                L.Components.back().getAssociatedDeclaration();
8046            assert(RelevantVD &&
8047                   "No relevant declaration related with device pointer??");
8048
8049            // If StructBaseCurInfo has been updated this iteration then work on
8050            // the first new entry added to it i.e. make sure that when multiple
8051            // values are added to any of the lists, the first value added is
8052            // being modified by the assignments below (not the last value
8053            // added).
8054            if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8055              StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8056                  RelevantVD;
8057              StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8058                  L.ForDeviceAddr ? DeviceInfoTy::Address
8059                                  : DeviceInfoTy::Pointer;
8060              StructBaseCurInfo.Types[StructBasePointersIdx] |=
8061                  OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8062            } else {
8063              CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8064              CurInfo.DevicePointers[CurrentBasePointersIdx] =
8065                  L.ForDeviceAddr ? DeviceInfoTy::Address
8066                                  : DeviceInfoTy::Pointer;
8067              CurInfo.Types[CurrentBasePointersIdx] |=
8068                  OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8069            }
8070          }
8071        }
8072      }
8073
8074      // Append any pending zero-length pointers which are struct members and
8075      // used with use_device_ptr or use_device_addr.
8076      auto CI = DeferredInfo.find(Data.first);
8077      if (CI != DeferredInfo.end()) {
8078        for (const DeferredDevicePtrEntryTy &L : CI->second) {
8079          llvm::Value *BasePtr;
8080          llvm::Value *Ptr;
8081          if (L.ForDeviceAddr) {
8082            if (L.IE->isGLValue())
8083              Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8084            else
8085              Ptr = this->CGF.EmitScalarExpr(L.IE);
8086            BasePtr = Ptr;
8087            // Entry is RETURN_PARAM. Also, set the placeholder value
8088            // MEMBER_OF=FFFF so that the entry is later updated with the
8089            // correct value of MEMBER_OF.
8090            CurInfo.Types.push_back(
8091                OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8092                OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8093          } else {
8094            BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8095            Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8096                                             L.IE->getExprLoc());
8097            // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8098            // placeholder value MEMBER_OF=FFFF so that the entry is later
8099            // updated with the correct value of MEMBER_OF.
8100            CurInfo.Types.push_back(
8101                OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8102                OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8103                OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8104          }
8105          CurInfo.Exprs.push_back(L.VD);
8106          CurInfo.BasePointers.emplace_back(BasePtr);
8107          CurInfo.DevicePtrDecls.emplace_back(L.VD);
8108          CurInfo.DevicePointers.emplace_back(
8109              L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8110          CurInfo.Pointers.push_back(Ptr);
8111          CurInfo.Sizes.push_back(
8112              llvm::Constant::getNullValue(this->CGF.Int64Ty));
8113          CurInfo.Mappers.push_back(nullptr);
8114        }
8115      }
8116
8117      // Unify entries in one list making sure the struct mapping precedes the
8118      // individual fields:
8119      MapCombinedInfoTy UnionCurInfo;
8120      UnionCurInfo.append(StructBaseCurInfo);
8121      UnionCurInfo.append(CurInfo);
8122
8123      // If there is an entry in PartialStruct it means we have a struct with
8124      // individual members mapped. Emit an extra combined entry.
8125      if (PartialStruct.Base.isValid()) {
8126        UnionCurInfo.NonContigInfo.Dims.push_back(0);
8127        // Emit a combined entry:
8128        emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8129                          /*IsMapThis*/ !VD, OMPBuilder, VD);
8130      }
8131
8132      // We need to append the results of this capture to what we already have.
8133      CombinedInfo.append(UnionCurInfo);
8134    }
8135    // Append data for use_device_ptr clauses.
8136    CombinedInfo.append(UseDeviceDataCombinedInfo);
8137  }
8138
8139public:
8140  MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8141      : CurDir(&Dir), CGF(CGF) {
8142    // Extract firstprivate clause information.
8143    for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8144      for (const auto *D : C->varlists())
8145        FirstPrivateDecls.try_emplace(
8146            cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8147    // Extract implicit firstprivates from uses_allocators clauses.
8148    for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8149      for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8150        OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8151        if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8152          FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8153                                        /*Implicit=*/true);
8154        else if (const auto *VD = dyn_cast<VarDecl>(
8155                     cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8156                         ->getDecl()))
8157          FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8158      }
8159    }
8160    // Extract device pointer clause information.
8161    for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8162      for (auto L : C->component_lists())
8163        DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8164    // Extract device addr clause information.
8165    for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8166      for (auto L : C->component_lists())
8167        HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8168    // Extract map information.
8169    for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8170      if (C->getMapType() != OMPC_MAP_to)
8171        continue;
8172      for (auto L : C->component_lists()) {
8173        const ValueDecl *VD = std::get<0>(L);
8174        const auto *RD = VD ? VD->getType()
8175                                  .getCanonicalType()
8176                                  .getNonReferenceType()
8177                                  ->getAsCXXRecordDecl()
8178                            : nullptr;
8179        if (RD && RD->isLambda())
8180          LambdasMap.try_emplace(std::get<0>(L), C);
8181      }
8182    }
8183  }
8184
8185  /// Constructor for the declare mapper directive.
8186  MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8187      : CurDir(&Dir), CGF(CGF) {}
8188
8189  /// Generate code for the combined entry if we have a partially mapped struct
8190  /// and take care of the mapping flags of the arguments corresponding to
8191  /// individual struct members.
8192  void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8193                         MapFlagsArrayTy &CurTypes,
8194                         const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8195                         llvm::OpenMPIRBuilder &OMPBuilder,
8196                         const ValueDecl *VD = nullptr,
8197                         bool NotTargetParams = true) const {
8198    if (CurTypes.size() == 1 &&
8199        ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8200         OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8201        !PartialStruct.IsArraySection)
8202      return;
8203    Address LBAddr = PartialStruct.LowestElem.second;
8204    Address HBAddr = PartialStruct.HighestElem.second;
8205    if (PartialStruct.HasCompleteRecord) {
8206      LBAddr = PartialStruct.LB;
8207      HBAddr = PartialStruct.LB;
8208    }
8209    CombinedInfo.Exprs.push_back(VD);
8210    // Base is the base of the struct
8211    CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8212    CombinedInfo.DevicePtrDecls.push_back(nullptr);
8213    CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8214    // Pointer is the address of the lowest element
8215    llvm::Value *LB = LBAddr.getPointer();
8216    const CXXMethodDecl *MD =
8217        CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8218    const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8219    bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8220    // There should not be a mapper for a combined entry.
8221    if (HasBaseClass) {
8222      // OpenMP 5.2 148:21:
8223      // If the target construct is within a class non-static member function,
8224      // and a variable is an accessible data member of the object for which the
8225      // non-static data member function is invoked, the variable is treated as
8226      // if the this[:1] expression had appeared in a map clause with a map-type
8227      // of tofrom.
8228      // Emit this[:1]
8229      CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer());
8230      QualType Ty = MD->getFunctionObjectParameterType();
8231      llvm::Value *Size =
8232          CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8233                                    /*isSigned=*/true);
8234      CombinedInfo.Sizes.push_back(Size);
8235    } else {
8236      CombinedInfo.Pointers.push_back(LB);
8237      // Size is (addr of {highest+1} element) - (addr of lowest element)
8238      llvm::Value *HB = HBAddr.getPointer();
8239      llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8240          HBAddr.getElementType(), HB, /*Idx0=*/1);
8241      llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8242      llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8243      llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8244      llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8245                                                    /*isSigned=*/false);
8246      CombinedInfo.Sizes.push_back(Size);
8247    }
8248    CombinedInfo.Mappers.push_back(nullptr);
8249    // Map type is always TARGET_PARAM, if generate info for captures.
8250    CombinedInfo.Types.push_back(
8251        NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8252                        : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8253    // If any element has the present modifier, then make sure the runtime
8254    // doesn't attempt to allocate the struct.
8255    if (CurTypes.end() !=
8256        llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8257          return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8258              Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8259        }))
8260      CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8261    // Remove TARGET_PARAM flag from the first element
8262    (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8263    // If any element has the ompx_hold modifier, then make sure the runtime
8264    // uses the hold reference count for the struct as a whole so that it won't
8265    // be unmapped by an extra dynamic reference count decrement.  Add it to all
8266    // elements as well so the runtime knows which reference count to check
8267    // when determining whether it's time for device-to-host transfers of
8268    // individual elements.
8269    if (CurTypes.end() !=
8270        llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8271          return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8272              Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8273        })) {
8274      CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8275      for (auto &M : CurTypes)
8276        M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8277    }
8278
8279    // All other current entries will be MEMBER_OF the combined entry
8280    // (except for PTR_AND_OBJ entries which do not have a placeholder value
8281    // 0xFFFF in the MEMBER_OF field).
8282    OpenMPOffloadMappingFlags MemberOfFlag =
8283        OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8284    for (auto &M : CurTypes)
8285      OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8286  }
8287
8288  /// Generate all the base pointers, section pointers, sizes, map types, and
8289  /// mappers for the extracted mappable expressions (all included in \a
8290  /// CombinedInfo). Also, for each item that relates with a device pointer, a
8291  /// pair of the relevant declaration and index where it occurs is appended to
8292  /// the device pointers info array.
8293  void generateAllInfo(
8294      MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8295      const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8296          llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8297    assert(CurDir.is<const OMPExecutableDirective *>() &&
8298           "Expect a executable directive");
8299    const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8300    generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8301                              SkipVarSet);
8302  }
8303
8304  /// Generate all the base pointers, section pointers, sizes, map types, and
8305  /// mappers for the extracted map clauses of user-defined mapper (all included
8306  /// in \a CombinedInfo).
8307  void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8308                                llvm::OpenMPIRBuilder &OMPBuilder) const {
8309    assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8310           "Expect a declare mapper directive");
8311    const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8312    generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8313                              OMPBuilder);
8314  }
8315
8316  /// Emit capture info for lambdas for variables captured by reference.
8317  void generateInfoForLambdaCaptures(
8318      const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8319      llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8320    QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8321    const auto *RD = VDType->getAsCXXRecordDecl();
8322    if (!RD || !RD->isLambda())
8323      return;
8324    Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8325                   CGF.getContext().getDeclAlign(VD));
8326    LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8327    llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8328    FieldDecl *ThisCapture = nullptr;
8329    RD->getCaptureFields(Captures, ThisCapture);
8330    if (ThisCapture) {
8331      LValue ThisLVal =
8332          CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8333      LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8334      LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8335                                 VDLVal.getPointer(CGF));
8336      CombinedInfo.Exprs.push_back(VD);
8337      CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8338      CombinedInfo.DevicePtrDecls.push_back(nullptr);
8339      CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8340      CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8341      CombinedInfo.Sizes.push_back(
8342          CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8343                                    CGF.Int64Ty, /*isSigned=*/true));
8344      CombinedInfo.Types.push_back(
8345          OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8346          OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8347          OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8348          OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8349      CombinedInfo.Mappers.push_back(nullptr);
8350    }
8351    for (const LambdaCapture &LC : RD->captures()) {
8352      if (!LC.capturesVariable())
8353        continue;
8354      const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8355      if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8356        continue;
8357      auto It = Captures.find(VD);
8358      assert(It != Captures.end() && "Found lambda capture without field.");
8359      LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8360      if (LC.getCaptureKind() == LCK_ByRef) {
8361        LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8362        LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8363                                   VDLVal.getPointer(CGF));
8364        CombinedInfo.Exprs.push_back(VD);
8365        CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8366        CombinedInfo.DevicePtrDecls.push_back(nullptr);
8367        CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8368        CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8369        CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8370            CGF.getTypeSize(
8371                VD->getType().getCanonicalType().getNonReferenceType()),
8372            CGF.Int64Ty, /*isSigned=*/true));
8373      } else {
8374        RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8375        LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8376                                   VDLVal.getPointer(CGF));
8377        CombinedInfo.Exprs.push_back(VD);
8378        CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8379        CombinedInfo.DevicePtrDecls.push_back(nullptr);
8380        CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8381        CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8382        CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8383      }
8384      CombinedInfo.Types.push_back(
8385          OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8386          OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8387          OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8388          OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8389      CombinedInfo.Mappers.push_back(nullptr);
8390    }
8391  }
8392
8393  /// Set correct indices for lambdas captures.
8394  void adjustMemberOfForLambdaCaptures(
8395      llvm::OpenMPIRBuilder &OMPBuilder,
8396      const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8397      MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8398      MapFlagsArrayTy &Types) const {
8399    for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8400      // Set correct member_of idx for all implicit lambda captures.
8401      if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8402                       OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8403                       OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8404                       OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8405        continue;
8406      llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8407      assert(BasePtr && "Unable to find base lambda address.");
8408      int TgtIdx = -1;
8409      for (unsigned J = I; J > 0; --J) {
8410        unsigned Idx = J - 1;
8411        if (Pointers[Idx] != BasePtr)
8412          continue;
8413        TgtIdx = Idx;
8414        break;
8415      }
8416      assert(TgtIdx != -1 && "Unable to find parent lambda.");
8417      // All other current entries will be MEMBER_OF the combined entry
8418      // (except for PTR_AND_OBJ entries which do not have a placeholder value
8419      // 0xFFFF in the MEMBER_OF field).
8420      OpenMPOffloadMappingFlags MemberOfFlag =
8421          OMPBuilder.getMemberOfFlag(TgtIdx);
8422      OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8423    }
8424  }
8425
8426  /// Generate the base pointers, section pointers, sizes, map types, and
8427  /// mappers associated to a given capture (all included in \a CombinedInfo).
8428  void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8429                              llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8430                              StructRangeInfoTy &PartialStruct) const {
8431    assert(!Cap->capturesVariableArrayType() &&
8432           "Not expecting to generate map info for a variable array type!");
8433
8434    // We need to know when we generating information for the first component
8435    const ValueDecl *VD = Cap->capturesThis()
8436                              ? nullptr
8437                              : Cap->getCapturedVar()->getCanonicalDecl();
8438
8439    // for map(to: lambda): skip here, processing it in
8440    // generateDefaultMapInfo
8441    if (LambdasMap.count(VD))
8442      return;
8443
8444    // If this declaration appears in a is_device_ptr clause we just have to
8445    // pass the pointer by value. If it is a reference to a declaration, we just
8446    // pass its value.
8447    if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8448      CombinedInfo.Exprs.push_back(VD);
8449      CombinedInfo.BasePointers.emplace_back(Arg);
8450      CombinedInfo.DevicePtrDecls.emplace_back(VD);
8451      CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8452      CombinedInfo.Pointers.push_back(Arg);
8453      CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8454          CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8455          /*isSigned=*/true));
8456      CombinedInfo.Types.push_back(
8457          OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8458          OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8459      CombinedInfo.Mappers.push_back(nullptr);
8460      return;
8461    }
8462
8463    using MapData =
8464        std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8465                   OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8466                   const ValueDecl *, const Expr *>;
8467    SmallVector<MapData, 4> DeclComponentLists;
8468    // For member fields list in is_device_ptr, store it in
8469    // DeclComponentLists for generating components info.
8470    static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8471    auto It = DevPointersMap.find(VD);
8472    if (It != DevPointersMap.end())
8473      for (const auto &MCL : It->second)
8474        DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8475                                        /*IsImpicit = */ true, nullptr,
8476                                        nullptr);
8477    auto I = HasDevAddrsMap.find(VD);
8478    if (I != HasDevAddrsMap.end())
8479      for (const auto &MCL : I->second)
8480        DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8481                                        /*IsImpicit = */ true, nullptr,
8482                                        nullptr);
8483    assert(CurDir.is<const OMPExecutableDirective *>() &&
8484           "Expect a executable directive");
8485    const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8486    for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8487      const auto *EI = C->getVarRefs().begin();
8488      for (const auto L : C->decl_component_lists(VD)) {
8489        const ValueDecl *VDecl, *Mapper;
8490        // The Expression is not correct if the mapping is implicit
8491        const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8492        OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8493        std::tie(VDecl, Components, Mapper) = L;
8494        assert(VDecl == VD && "We got information for the wrong declaration??");
8495        assert(!Components.empty() &&
8496               "Not expecting declaration with no component lists.");
8497        DeclComponentLists.emplace_back(Components, C->getMapType(),
8498                                        C->getMapTypeModifiers(),
8499                                        C->isImplicit(), Mapper, E);
8500        ++EI;
8501      }
8502    }
8503    llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8504                                             const MapData &RHS) {
8505      ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8506      OpenMPMapClauseKind MapType = std::get<1>(RHS);
8507      bool HasPresent =
8508          llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8509      bool HasAllocs = MapType == OMPC_MAP_alloc;
8510      MapModifiers = std::get<2>(RHS);
8511      MapType = std::get<1>(LHS);
8512      bool HasPresentR =
8513          llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8514      bool HasAllocsR = MapType == OMPC_MAP_alloc;
8515      return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8516    });
8517
8518    // Find overlapping elements (including the offset from the base element).
8519    llvm::SmallDenseMap<
8520        const MapData *,
8521        llvm::SmallVector<
8522            OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8523        4>
8524        OverlappedData;
8525    size_t Count = 0;
8526    for (const MapData &L : DeclComponentLists) {
8527      OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8528      OpenMPMapClauseKind MapType;
8529      ArrayRef<OpenMPMapModifierKind> MapModifiers;
8530      bool IsImplicit;
8531      const ValueDecl *Mapper;
8532      const Expr *VarRef;
8533      std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8534          L;
8535      ++Count;
8536      for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8537        OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8538        std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8539                 VarRef) = L1;
8540        auto CI = Components.rbegin();
8541        auto CE = Components.rend();
8542        auto SI = Components1.rbegin();
8543        auto SE = Components1.rend();
8544        for (; CI != CE && SI != SE; ++CI, ++SI) {
8545          if (CI->getAssociatedExpression()->getStmtClass() !=
8546              SI->getAssociatedExpression()->getStmtClass())
8547            break;
8548          // Are we dealing with different variables/fields?
8549          if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8550            break;
8551        }
8552        // Found overlapping if, at least for one component, reached the head
8553        // of the components list.
8554        if (CI == CE || SI == SE) {
8555          // Ignore it if it is the same component.
8556          if (CI == CE && SI == SE)
8557            continue;
8558          const auto It = (SI == SE) ? CI : SI;
8559          // If one component is a pointer and another one is a kind of
8560          // dereference of this pointer (array subscript, section, dereference,
8561          // etc.), it is not an overlapping.
8562          // Same, if one component is a base and another component is a
8563          // dereferenced pointer memberexpr with the same base.
8564          if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8565              (std::prev(It)->getAssociatedDeclaration() &&
8566               std::prev(It)
8567                   ->getAssociatedDeclaration()
8568                   ->getType()
8569                   ->isPointerType()) ||
8570              (It->getAssociatedDeclaration() &&
8571               It->getAssociatedDeclaration()->getType()->isPointerType() &&
8572               std::next(It) != CE && std::next(It) != SE))
8573            continue;
8574          const MapData &BaseData = CI == CE ? L : L1;
8575          OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8576              SI == SE ? Components : Components1;
8577          auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8578          OverlappedElements.getSecond().push_back(SubData);
8579        }
8580      }
8581    }
8582    // Sort the overlapped elements for each item.
8583    llvm::SmallVector<const FieldDecl *, 4> Layout;
8584    if (!OverlappedData.empty()) {
8585      const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8586      const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8587      while (BaseType != OrigType) {
8588        BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8589        OrigType = BaseType->getPointeeOrArrayElementType();
8590      }
8591
8592      if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8593        getPlainLayout(CRD, Layout, /*AsBase=*/false);
8594      else {
8595        const auto *RD = BaseType->getAsRecordDecl();
8596        Layout.append(RD->field_begin(), RD->field_end());
8597      }
8598    }
8599    for (auto &Pair : OverlappedData) {
8600      llvm::stable_sort(
8601          Pair.getSecond(),
8602          [&Layout](
8603              OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8604              OMPClauseMappableExprCommon::MappableExprComponentListRef
8605                  Second) {
8606            auto CI = First.rbegin();
8607            auto CE = First.rend();
8608            auto SI = Second.rbegin();
8609            auto SE = Second.rend();
8610            for (; CI != CE && SI != SE; ++CI, ++SI) {
8611              if (CI->getAssociatedExpression()->getStmtClass() !=
8612                  SI->getAssociatedExpression()->getStmtClass())
8613                break;
8614              // Are we dealing with different variables/fields?
8615              if (CI->getAssociatedDeclaration() !=
8616                  SI->getAssociatedDeclaration())
8617                break;
8618            }
8619
8620            // Lists contain the same elements.
8621            if (CI == CE && SI == SE)
8622              return false;
8623
8624            // List with less elements is less than list with more elements.
8625            if (CI == CE || SI == SE)
8626              return CI == CE;
8627
8628            const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8629            const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8630            if (FD1->getParent() == FD2->getParent())
8631              return FD1->getFieldIndex() < FD2->getFieldIndex();
8632            const auto *It =
8633                llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8634                  return FD == FD1 || FD == FD2;
8635                });
8636            return *It == FD1;
8637          });
8638    }
8639
8640    // Associated with a capture, because the mapping flags depend on it.
8641    // Go through all of the elements with the overlapped elements.
8642    bool IsFirstComponentList = true;
8643    MapCombinedInfoTy StructBaseCombinedInfo;
8644    for (const auto &Pair : OverlappedData) {
8645      const MapData &L = *Pair.getFirst();
8646      OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8647      OpenMPMapClauseKind MapType;
8648      ArrayRef<OpenMPMapModifierKind> MapModifiers;
8649      bool IsImplicit;
8650      const ValueDecl *Mapper;
8651      const Expr *VarRef;
8652      std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8653          L;
8654      ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8655          OverlappedComponents = Pair.getSecond();
8656      generateInfoForComponentList(
8657          MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8658          StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8659          IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8660          /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8661      IsFirstComponentList = false;
8662    }
8663    // Go through other elements without overlapped elements.
8664    for (const MapData &L : DeclComponentLists) {
8665      OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8666      OpenMPMapClauseKind MapType;
8667      ArrayRef<OpenMPMapModifierKind> MapModifiers;
8668      bool IsImplicit;
8669      const ValueDecl *Mapper;
8670      const Expr *VarRef;
8671      std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8672          L;
8673      auto It = OverlappedData.find(&L);
8674      if (It == OverlappedData.end())
8675        generateInfoForComponentList(
8676            MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8677            StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8678            IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8679            /*ForDeviceAddr=*/false, VD, VarRef);
8680      IsFirstComponentList = false;
8681    }
8682  }
8683
8684  /// Generate the default map information for a given capture \a CI,
8685  /// record field declaration \a RI and captured value \a CV.
8686  void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8687                              const FieldDecl &RI, llvm::Value *CV,
8688                              MapCombinedInfoTy &CombinedInfo) const {
8689    bool IsImplicit = true;
8690    // Do the default mapping.
8691    if (CI.capturesThis()) {
8692      CombinedInfo.Exprs.push_back(nullptr);
8693      CombinedInfo.BasePointers.push_back(CV);
8694      CombinedInfo.DevicePtrDecls.push_back(nullptr);
8695      CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8696      CombinedInfo.Pointers.push_back(CV);
8697      const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8698      CombinedInfo.Sizes.push_back(
8699          CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8700                                    CGF.Int64Ty, /*isSigned=*/true));
8701      // Default map type.
8702      CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8703                                   OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8704    } else if (CI.capturesVariableByCopy()) {
8705      const VarDecl *VD = CI.getCapturedVar();
8706      CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8707      CombinedInfo.BasePointers.push_back(CV);
8708      CombinedInfo.DevicePtrDecls.push_back(nullptr);
8709      CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8710      CombinedInfo.Pointers.push_back(CV);
8711      if (!RI.getType()->isAnyPointerType()) {
8712        // We have to signal to the runtime captures passed by value that are
8713        // not pointers.
8714        CombinedInfo.Types.push_back(
8715            OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8716        CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8717            CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8718      } else {
8719        // Pointers are implicitly mapped with a zero size and no flags
8720        // (other than first map that is added for all implicit maps).
8721        CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8722        CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8723      }
8724      auto I = FirstPrivateDecls.find(VD);
8725      if (I != FirstPrivateDecls.end())
8726        IsImplicit = I->getSecond();
8727    } else {
8728      assert(CI.capturesVariable() && "Expected captured reference.");
8729      const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8730      QualType ElementType = PtrTy->getPointeeType();
8731      CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8732          CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8733      // The default map type for a scalar/complex type is 'to' because by
8734      // default the value doesn't have to be retrieved. For an aggregate
8735      // type, the default is 'tofrom'.
8736      CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8737      const VarDecl *VD = CI.getCapturedVar();
8738      auto I = FirstPrivateDecls.find(VD);
8739      CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8740      CombinedInfo.BasePointers.push_back(CV);
8741      CombinedInfo.DevicePtrDecls.push_back(nullptr);
8742      CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8743      if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8744        Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8745            CV, ElementType, CGF.getContext().getDeclAlign(VD),
8746            AlignmentSource::Decl));
8747        CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8748      } else {
8749        CombinedInfo.Pointers.push_back(CV);
8750      }
8751      if (I != FirstPrivateDecls.end())
8752        IsImplicit = I->getSecond();
8753    }
8754    // Every default map produces a single argument which is a target parameter.
8755    CombinedInfo.Types.back() |=
8756        OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8757
8758    // Add flag stating this is an implicit map.
8759    if (IsImplicit)
8760      CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8761
8762    // No user-defined mapper for default mapping.
8763    CombinedInfo.Mappers.push_back(nullptr);
8764  }
8765};
8766} // anonymous namespace
8767
8768// Try to extract the base declaration from a `this->x` expression if possible.
8769static ValueDecl *getDeclFromThisExpr(const Expr *E) {
8770  if (!E)
8771    return nullptr;
8772
8773  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
8774    if (const MemberExpr *ME =
8775            dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8776      return ME->getMemberDecl();
8777  return nullptr;
8778}
8779
8780/// Emit a string constant containing the names of the values mapped to the
8781/// offloading runtime library.
8782llvm::Constant *
8783emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8784                       MappableExprsHandler::MappingExprInfo &MapExprs) {
8785
8786  uint32_t SrcLocStrSize;
8787  if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8788    return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8789
8790  SourceLocation Loc;
8791  if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8792    if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8793      Loc = VD->getLocation();
8794    else
8795      Loc = MapExprs.getMapExpr()->getExprLoc();
8796  } else {
8797    Loc = MapExprs.getMapDecl()->getLocation();
8798  }
8799
8800  std::string ExprName;
8801  if (MapExprs.getMapExpr()) {
8802    PrintingPolicy P(CGF.getContext().getLangOpts());
8803    llvm::raw_string_ostream OS(ExprName);
8804    MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8805    OS.flush();
8806  } else {
8807    ExprName = MapExprs.getMapDecl()->getNameAsString();
8808  }
8809
8810  PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
8811  return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8812                                         PLoc.getLine(), PLoc.getColumn(),
8813                                         SrcLocStrSize);
8814}
8815
8816/// Emit the arrays used to pass the captures and map information to the
8817/// offloading runtime library. If there is no map or capture information,
8818/// return nullptr by reference.
8819static void emitOffloadingArrays(
8820    CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8821    CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8822    bool IsNonContiguous = false) {
8823  CodeGenModule &CGM = CGF.CGM;
8824
8825  // Reset the array information.
8826  Info.clearArrayInfo();
8827  Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8828
8829  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8830  InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8831                         CGF.AllocaInsertPt->getIterator());
8832  InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8833                          CGF.Builder.GetInsertPoint());
8834
8835  auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
8836    return emitMappingInformation(CGF, OMPBuilder, MapExpr);
8837  };
8838  if (CGM.getCodeGenOpts().getDebugInfo() !=
8839      llvm::codegenoptions::NoDebugInfo) {
8840    CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
8841    llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
8842                    FillInfoMap);
8843  }
8844
8845  auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8846    if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8847      Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8848    }
8849  };
8850
8851  auto CustomMapperCB = [&](unsigned int I) {
8852    llvm::Value *MFunc = nullptr;
8853    if (CombinedInfo.Mappers[I]) {
8854      Info.HasMapper = true;
8855      MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8856          cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8857    }
8858    return MFunc;
8859  };
8860  OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
8861                                  /*IsNonContiguous=*/true, DeviceAddrCB,
8862                                  CustomMapperCB);
8863}
8864
8865/// Check for inner distribute directive.
8866static const OMPExecutableDirective *
8867getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8868  const auto *CS = D.getInnermostCapturedStmt();
8869  const auto *Body =
8870      CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8871  const Stmt *ChildStmt =
8872      CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8873
8874  if (const auto *NestedDir =
8875          dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8876    OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8877    switch (D.getDirectiveKind()) {
8878    case OMPD_target:
8879      // For now, just treat 'target teams loop' as if it's distributed.
8880      if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8881        return NestedDir;
8882      if (DKind == OMPD_teams) {
8883        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8884            /*IgnoreCaptured=*/true);
8885        if (!Body)
8886          return nullptr;
8887        ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8888        if (const auto *NND =
8889                dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8890          DKind = NND->getDirectiveKind();
8891          if (isOpenMPDistributeDirective(DKind))
8892            return NND;
8893        }
8894      }
8895      return nullptr;
8896    case OMPD_target_teams:
8897      if (isOpenMPDistributeDirective(DKind))
8898        return NestedDir;
8899      return nullptr;
8900    case OMPD_target_parallel:
8901    case OMPD_target_simd:
8902    case OMPD_target_parallel_for:
8903    case OMPD_target_parallel_for_simd:
8904      return nullptr;
8905    case OMPD_target_teams_distribute:
8906    case OMPD_target_teams_distribute_simd:
8907    case OMPD_target_teams_distribute_parallel_for:
8908    case OMPD_target_teams_distribute_parallel_for_simd:
8909    case OMPD_parallel:
8910    case OMPD_for:
8911    case OMPD_parallel_for:
8912    case OMPD_parallel_master:
8913    case OMPD_parallel_sections:
8914    case OMPD_for_simd:
8915    case OMPD_parallel_for_simd:
8916    case OMPD_cancel:
8917    case OMPD_cancellation_point:
8918    case OMPD_ordered:
8919    case OMPD_threadprivate:
8920    case OMPD_allocate:
8921    case OMPD_task:
8922    case OMPD_simd:
8923    case OMPD_tile:
8924    case OMPD_unroll:
8925    case OMPD_sections:
8926    case OMPD_section:
8927    case OMPD_single:
8928    case OMPD_master:
8929    case OMPD_critical:
8930    case OMPD_taskyield:
8931    case OMPD_barrier:
8932    case OMPD_taskwait:
8933    case OMPD_taskgroup:
8934    case OMPD_atomic:
8935    case OMPD_flush:
8936    case OMPD_depobj:
8937    case OMPD_scan:
8938    case OMPD_teams:
8939    case OMPD_target_data:
8940    case OMPD_target_exit_data:
8941    case OMPD_target_enter_data:
8942    case OMPD_distribute:
8943    case OMPD_distribute_simd:
8944    case OMPD_distribute_parallel_for:
8945    case OMPD_distribute_parallel_for_simd:
8946    case OMPD_teams_distribute:
8947    case OMPD_teams_distribute_simd:
8948    case OMPD_teams_distribute_parallel_for:
8949    case OMPD_teams_distribute_parallel_for_simd:
8950    case OMPD_target_update:
8951    case OMPD_declare_simd:
8952    case OMPD_declare_variant:
8953    case OMPD_begin_declare_variant:
8954    case OMPD_end_declare_variant:
8955    case OMPD_declare_target:
8956    case OMPD_end_declare_target:
8957    case OMPD_declare_reduction:
8958    case OMPD_declare_mapper:
8959    case OMPD_taskloop:
8960    case OMPD_taskloop_simd:
8961    case OMPD_master_taskloop:
8962    case OMPD_master_taskloop_simd:
8963    case OMPD_parallel_master_taskloop:
8964    case OMPD_parallel_master_taskloop_simd:
8965    case OMPD_requires:
8966    case OMPD_metadirective:
8967    case OMPD_unknown:
8968    default:
8969      llvm_unreachable("Unexpected directive.");
8970    }
8971  }
8972
8973  return nullptr;
8974}
8975
8976/// Emit the user-defined mapper function. The code generation follows the
8977/// pattern in the example below.
8978/// \code
8979/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8980///                                           void *base, void *begin,
8981///                                           int64_t size, int64_t type,
8982///                                           void *name = nullptr) {
8983///   // Allocate space for an array section first or add a base/begin for
8984///   // pointer dereference.
8985///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
8986///       !maptype.IsDelete)
8987///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8988///                                 size*sizeof(Ty), clearToFromMember(type));
8989///   // Map members.
8990///   for (unsigned i = 0; i < size; i++) {
8991///     // For each component specified by this mapper:
8992///     for (auto c : begin[i]->all_components) {
8993///       if (c.hasMapper())
8994///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8995///                       c.arg_type, c.arg_name);
8996///       else
8997///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8998///                                     c.arg_begin, c.arg_size, c.arg_type,
8999///                                     c.arg_name);
9000///     }
9001///   }
9002///   // Delete the array section.
9003///   if (size > 1 && maptype.IsDelete)
9004///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9005///                                 size*sizeof(Ty), clearToFromMember(type));
9006/// }
9007/// \endcode
9008void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9009                                            CodeGenFunction *CGF) {
9010  if (UDMMap.count(D) > 0)
9011    return;
9012  ASTContext &C = CGM.getContext();
9013  QualType Ty = D->getType();
9014  QualType PtrTy = C.getPointerType(Ty).withRestrict();
9015  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9016  auto *MapperVarDecl =
9017      cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9018  SourceLocation Loc = D->getLocation();
9019  CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9020  llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9021
9022  // Prepare mapper function arguments and attributes.
9023  ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9024                              C.VoidPtrTy, ImplicitParamKind::Other);
9025  ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9026                            ImplicitParamKind::Other);
9027  ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9028                             C.VoidPtrTy, ImplicitParamKind::Other);
9029  ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9030                            ImplicitParamKind::Other);
9031  ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9032                            ImplicitParamKind::Other);
9033  ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9034                            ImplicitParamKind::Other);
9035  FunctionArgList Args;
9036  Args.push_back(&HandleArg);
9037  Args.push_back(&BaseArg);
9038  Args.push_back(&BeginArg);
9039  Args.push_back(&SizeArg);
9040  Args.push_back(&TypeArg);
9041  Args.push_back(&NameArg);
9042  const CGFunctionInfo &FnInfo =
9043      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9044  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9045  SmallString<64> TyStr;
9046  llvm::raw_svector_ostream Out(TyStr);
9047  CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9048  std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9049  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9050                                    Name, &CGM.getModule());
9051  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9052  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9053  // Start the mapper function code generation.
9054  CodeGenFunction MapperCGF(CGM);
9055  MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9056  // Compute the starting and end addresses of array elements.
9057  llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9058      MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9059      C.getPointerType(Int64Ty), Loc);
9060  // Prepare common arguments for array initiation and deletion.
9061  llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9062      MapperCGF.GetAddrOfLocalVar(&HandleArg),
9063      /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9064  llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9065      MapperCGF.GetAddrOfLocalVar(&BaseArg),
9066      /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9067  llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9068      MapperCGF.GetAddrOfLocalVar(&BeginArg),
9069      /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9070  // Convert the size in bytes into the number of array elements.
9071  Size = MapperCGF.Builder.CreateExactUDiv(
9072      Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9073  llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9074      BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9075  llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9076  llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9077      MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9078      C.getPointerType(Int64Ty), Loc);
9079  llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9080      MapperCGF.GetAddrOfLocalVar(&NameArg),
9081      /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9082
9083  // Emit array initiation if this is an array section and \p MapType indicates
9084  // that memory allocation is required.
9085  llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9086  emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9087                             MapName, ElementSize, HeadBB, /*IsInit=*/true);
9088
9089  // Emit a for loop to iterate through SizeArg of elements and map all of them.
9090
9091  // Emit the loop header block.
9092  MapperCGF.EmitBlock(HeadBB);
9093  llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9094  llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9095  // Evaluate whether the initial condition is satisfied.
9096  llvm::Value *IsEmpty =
9097      MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9098  MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9099  llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9100
9101  // Emit the loop body block.
9102  MapperCGF.EmitBlock(BodyBB);
9103  llvm::BasicBlock *LastBB = BodyBB;
9104  llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9105      PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9106  PtrPHI->addIncoming(PtrBegin, EntryBB);
9107  Address PtrCurrent(PtrPHI, ElemTy,
9108                     MapperCGF.GetAddrOfLocalVar(&BeginArg)
9109                         .getAlignment()
9110                         .alignmentOfArrayElement(ElementSize));
9111  // Privatize the declared variable of mapper to be the current array element.
9112  CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9113  Scope.addPrivate(MapperVarDecl, PtrCurrent);
9114  (void)Scope.Privatize();
9115
9116  // Get map clause information. Fill up the arrays with all mapped variables.
9117  MappableExprsHandler::MapCombinedInfoTy Info;
9118  MappableExprsHandler MEHandler(*D, MapperCGF);
9119  MEHandler.generateAllInfoForMapper(Info, OMPBuilder);
9120
9121  // Call the runtime API __tgt_mapper_num_components to get the number of
9122  // pre-existing components.
9123  llvm::Value *OffloadingArgs[] = {Handle};
9124  llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9125      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9126                                            OMPRTL___tgt_mapper_num_components),
9127      OffloadingArgs);
9128  llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9129      PreviousSize,
9130      MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9131
9132  // Fill up the runtime mapper handle for all components.
9133  for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9134    llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9135        Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9136    llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9137        Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9138    llvm::Value *CurSizeArg = Info.Sizes[I];
9139    llvm::Value *CurNameArg =
9140        (CGM.getCodeGenOpts().getDebugInfo() ==
9141         llvm::codegenoptions::NoDebugInfo)
9142            ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9143            : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9144
9145    // Extract the MEMBER_OF field from the map type.
9146    llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9147        static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9148            Info.Types[I]));
9149    llvm::Value *MemberMapType =
9150        MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9151
9152    // Combine the map type inherited from user-defined mapper with that
9153    // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9154    // bits of the \a MapType, which is the input argument of the mapper
9155    // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9156    // bits of MemberMapType.
9157    // [OpenMP 5.0], 1.2.6. map-type decay.
9158    //        | alloc |  to   | from  | tofrom | release | delete
9159    // ----------------------------------------------------------
9160    // alloc  | alloc | alloc | alloc | alloc  | release | delete
9161    // to     | alloc |  to   | alloc |   to   | release | delete
9162    // from   | alloc | alloc | from  |  from  | release | delete
9163    // tofrom | alloc |  to   | from  | tofrom | release | delete
9164    llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9165        MapType,
9166        MapperCGF.Builder.getInt64(
9167            static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9168                OpenMPOffloadMappingFlags::OMP_MAP_TO |
9169                OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9170    llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9171    llvm::BasicBlock *AllocElseBB =
9172        MapperCGF.createBasicBlock("omp.type.alloc.else");
9173    llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9174    llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9175    llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9176    llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9177    llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9178    MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9179    // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9180    MapperCGF.EmitBlock(AllocBB);
9181    llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9182        MemberMapType,
9183        MapperCGF.Builder.getInt64(
9184            ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9185                OpenMPOffloadMappingFlags::OMP_MAP_TO |
9186                OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9187    MapperCGF.Builder.CreateBr(EndBB);
9188    MapperCGF.EmitBlock(AllocElseBB);
9189    llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9190        LeftToFrom,
9191        MapperCGF.Builder.getInt64(
9192            static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9193                OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9194    MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9195    // In case of to, clear OMP_MAP_FROM.
9196    MapperCGF.EmitBlock(ToBB);
9197    llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9198        MemberMapType,
9199        MapperCGF.Builder.getInt64(
9200            ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9201                OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9202    MapperCGF.Builder.CreateBr(EndBB);
9203    MapperCGF.EmitBlock(ToElseBB);
9204    llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9205        LeftToFrom,
9206        MapperCGF.Builder.getInt64(
9207            static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9208                OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9209    MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9210    // In case of from, clear OMP_MAP_TO.
9211    MapperCGF.EmitBlock(FromBB);
9212    llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9213        MemberMapType,
9214        MapperCGF.Builder.getInt64(
9215            ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9216                OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9217    // In case of tofrom, do nothing.
9218    MapperCGF.EmitBlock(EndBB);
9219    LastBB = EndBB;
9220    llvm::PHINode *CurMapType =
9221        MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9222    CurMapType->addIncoming(AllocMapType, AllocBB);
9223    CurMapType->addIncoming(ToMapType, ToBB);
9224    CurMapType->addIncoming(FromMapType, FromBB);
9225    CurMapType->addIncoming(MemberMapType, ToElseBB);
9226
9227    llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9228                                     CurSizeArg, CurMapType, CurNameArg};
9229    if (Info.Mappers[I]) {
9230      // Call the corresponding mapper function.
9231      llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9232          cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9233      assert(MapperFunc && "Expect a valid mapper function is available.");
9234      MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9235    } else {
9236      // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9237      // data structure.
9238      MapperCGF.EmitRuntimeCall(
9239          OMPBuilder.getOrCreateRuntimeFunction(
9240              CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9241          OffloadingArgs);
9242    }
9243  }
9244
9245  // Update the pointer to point to the next element that needs to be mapped,
9246  // and check whether we have mapped all elements.
9247  llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9248      ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9249  PtrPHI->addIncoming(PtrNext, LastBB);
9250  llvm::Value *IsDone =
9251      MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9252  llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9253  MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9254
9255  MapperCGF.EmitBlock(ExitBB);
9256  // Emit array deletion if this is an array section and \p MapType indicates
9257  // that deletion is required.
9258  emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9259                             MapName, ElementSize, DoneBB, /*IsInit=*/false);
9260
9261  // Emit the function exit block.
9262  MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9263  MapperCGF.FinishFunction();
9264  UDMMap.try_emplace(D, Fn);
9265  if (CGF) {
9266    auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9267    Decls.second.push_back(D);
9268  }
9269}
9270
9271/// Emit the array initialization or deletion portion for user-defined mapper
9272/// code generation. First, it evaluates whether an array section is mapped and
9273/// whether the \a MapType instructs to delete this section. If \a IsInit is
9274/// true, and \a MapType indicates to not delete this array, array
9275/// initialization code is generated. If \a IsInit is false, and \a MapType
9276/// indicates to not this array, array deletion code is generated.
9277void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9278    CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9279    llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9280    llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9281    bool IsInit) {
9282  StringRef Prefix = IsInit ? ".init" : ".del";
9283
9284  // Evaluate if this is an array section.
9285  llvm::BasicBlock *BodyBB =
9286      MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9287  llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9288      Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9289  llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9290      MapType,
9291      MapperCGF.Builder.getInt64(
9292          static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9293              OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9294  llvm::Value *DeleteCond;
9295  llvm::Value *Cond;
9296  if (IsInit) {
9297    // base != begin?
9298    llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9299    // IsPtrAndObj?
9300    llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9301        MapType,
9302        MapperCGF.Builder.getInt64(
9303            static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9304                OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9305    PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9306    BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9307    Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9308    DeleteCond = MapperCGF.Builder.CreateIsNull(
9309        DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9310  } else {
9311    Cond = IsArray;
9312    DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9313        DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9314  }
9315  Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9316  MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9317
9318  MapperCGF.EmitBlock(BodyBB);
9319  // Get the array size by multiplying element size and element number (i.e., \p
9320  // Size).
9321  llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9322      Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9323  // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9324  // memory allocation/deletion purpose only.
9325  llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9326      MapType,
9327      MapperCGF.Builder.getInt64(
9328          ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9329              OpenMPOffloadMappingFlags::OMP_MAP_TO |
9330              OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9331  MapTypeArg = MapperCGF.Builder.CreateOr(
9332      MapTypeArg,
9333      MapperCGF.Builder.getInt64(
9334          static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9335              OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9336
9337  // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9338  // data structure.
9339  llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9340                                   ArraySize, MapTypeArg, MapName};
9341  MapperCGF.EmitRuntimeCall(
9342      OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9343                                            OMPRTL___tgt_push_mapper_component),
9344      OffloadingArgs);
9345}
9346
9347llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9348    const OMPDeclareMapperDecl *D) {
9349  auto I = UDMMap.find(D);
9350  if (I != UDMMap.end())
9351    return I->second;
9352  emitUserDefinedMapper(D);
9353  return UDMMap.lookup(D);
9354}
9355
9356llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9357    CodeGenFunction &CGF, const OMPExecutableDirective &D,
9358    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9359                                     const OMPLoopDirective &D)>
9360        SizeEmitter) {
9361  OpenMPDirectiveKind Kind = D.getDirectiveKind();
9362  const OMPExecutableDirective *TD = &D;
9363  // Get nested teams distribute kind directive, if any.
9364  if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9365      Kind != OMPD_target_teams_loop)
9366    TD = getNestedDistributeDirective(CGM.getContext(), D);
9367  if (!TD)
9368    return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9369
9370  const auto *LD = cast<OMPLoopDirective>(TD);
9371  if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9372    return NumIterations;
9373  return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9374}
9375
9376static void
9377emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9378                       const OMPExecutableDirective &D,
9379                       llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9380                       bool RequiresOuterTask, const CapturedStmt &CS,
9381                       bool OffloadingMandatory, CodeGenFunction &CGF) {
9382  if (OffloadingMandatory) {
9383    CGF.Builder.CreateUnreachable();
9384  } else {
9385    if (RequiresOuterTask) {
9386      CapturedVars.clear();
9387      CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9388    }
9389    OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9390                                         CapturedVars);
9391  }
9392}
9393
9394static llvm::Value *emitDeviceID(
9395    llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9396    CodeGenFunction &CGF) {
9397  // Emit device ID if any.
9398  llvm::Value *DeviceID;
9399  if (Device.getPointer()) {
9400    assert((Device.getInt() == OMPC_DEVICE_unknown ||
9401            Device.getInt() == OMPC_DEVICE_device_num) &&
9402           "Expected device_num modifier.");
9403    llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9404    DeviceID =
9405        CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9406  } else {
9407    DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9408  }
9409  return DeviceID;
9410}
9411
9412llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9413                               CodeGenFunction &CGF) {
9414  llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9415
9416  if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9417    CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9418    llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9419        DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9420    DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9421                                             /*isSigned=*/false);
9422  }
9423  return DynCGroupMem;
9424}
9425
9426static void emitTargetCallKernelLaunch(
9427    CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9428    const OMPExecutableDirective &D,
9429    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9430    const CapturedStmt &CS, bool OffloadingMandatory,
9431    llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9432    llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9433    llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9434    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9435                                     const OMPLoopDirective &D)>
9436        SizeEmitter,
9437    CodeGenFunction &CGF, CodeGenModule &CGM) {
9438  llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9439
9440  // Fill up the arrays with all the captured variables.
9441  MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9442
9443  // Get mappable expression information.
9444  MappableExprsHandler MEHandler(D, CGF);
9445  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9446  llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9447
9448  auto RI = CS.getCapturedRecordDecl()->field_begin();
9449  auto *CV = CapturedVars.begin();
9450  for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9451                                            CE = CS.capture_end();
9452       CI != CE; ++CI, ++RI, ++CV) {
9453    MappableExprsHandler::MapCombinedInfoTy CurInfo;
9454    MappableExprsHandler::StructRangeInfoTy PartialStruct;
9455
9456    // VLA sizes are passed to the outlined region by copy and do not have map
9457    // information associated.
9458    if (CI->capturesVariableArrayType()) {
9459      CurInfo.Exprs.push_back(nullptr);
9460      CurInfo.BasePointers.push_back(*CV);
9461      CurInfo.DevicePtrDecls.push_back(nullptr);
9462      CurInfo.DevicePointers.push_back(
9463          MappableExprsHandler::DeviceInfoTy::None);
9464      CurInfo.Pointers.push_back(*CV);
9465      CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9466          CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9467      // Copy to the device as an argument. No need to retrieve it.
9468      CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9469                              OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9470                              OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9471      CurInfo.Mappers.push_back(nullptr);
9472    } else {
9473      // If we have any information in the map clause, we use it, otherwise we
9474      // just do a default mapping.
9475      MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9476      if (!CI->capturesThis())
9477        MappedVarSet.insert(CI->getCapturedVar());
9478      else
9479        MappedVarSet.insert(nullptr);
9480      if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9481        MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9482      // Generate correct mapping for variables captured by reference in
9483      // lambdas.
9484      if (CI->capturesVariable())
9485        MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9486                                                CurInfo, LambdaPointers);
9487    }
9488    // We expect to have at least an element of information for this capture.
9489    assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9490           "Non-existing map pointer for capture!");
9491    assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9492           CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9493           CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9494           CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9495           "Inconsistent map information sizes!");
9496
9497    // If there is an entry in PartialStruct it means we have a struct with
9498    // individual members mapped. Emit an extra combined entry.
9499    if (PartialStruct.Base.isValid()) {
9500      CombinedInfo.append(PartialStruct.PreliminaryMapData);
9501      MEHandler.emitCombinedEntry(
9502          CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
9503          OMPBuilder, nullptr,
9504          !PartialStruct.PreliminaryMapData.BasePointers.empty());
9505    }
9506
9507    // We need to append the results of this capture to what we already have.
9508    CombinedInfo.append(CurInfo);
9509  }
9510  // Adjust MEMBER_OF flags for the lambdas captures.
9511  MEHandler.adjustMemberOfForLambdaCaptures(
9512      OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9513      CombinedInfo.Pointers, CombinedInfo.Types);
9514  // Map any list items in a map clause that were not captures because they
9515  // weren't referenced within the construct.
9516  MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
9517
9518  CGOpenMPRuntime::TargetDataInfo Info;
9519  // Fill up the arrays and create the arguments.
9520  emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9521  bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9522                   llvm::codegenoptions::NoDebugInfo;
9523  OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
9524                                          EmitDebug,
9525                                          /*ForEndCall=*/false);
9526
9527  InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9528  InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9529                                        CGF.VoidPtrTy, CGM.getPointerAlign());
9530  InputInfo.PointersArray =
9531      Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9532  InputInfo.SizesArray =
9533      Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9534  InputInfo.MappersArray =
9535      Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9536  MapTypesArray = Info.RTArgs.MapTypesArray;
9537  MapNamesArray = Info.RTArgs.MapNamesArray;
9538
9539  auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9540                    RequiresOuterTask, &CS, OffloadingMandatory, Device,
9541                    OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9542                    SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9543    bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9544
9545    if (IsReverseOffloading) {
9546      // Reverse offloading is not supported, so just execute on the host.
9547      // FIXME: This fallback solution is incorrect since it ignores the
9548      // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9549      // assert here and ensure SEMA emits an error.
9550      emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9551                             RequiresOuterTask, CS, OffloadingMandatory, CGF);
9552      return;
9553    }
9554
9555    bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9556    unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9557
9558    llvm::Value *BasePointersArray = InputInfo.BasePointersArray.getPointer();
9559    llvm::Value *PointersArray = InputInfo.PointersArray.getPointer();
9560    llvm::Value *SizesArray = InputInfo.SizesArray.getPointer();
9561    llvm::Value *MappersArray = InputInfo.MappersArray.getPointer();
9562
9563    auto &&EmitTargetCallFallbackCB =
9564        [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9565         OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9566        -> llvm::OpenMPIRBuilder::InsertPointTy {
9567      CGF.Builder.restoreIP(IP);
9568      emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9569                             RequiresOuterTask, CS, OffloadingMandatory, CGF);
9570      return CGF.Builder.saveIP();
9571    };
9572
9573    llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9574    llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9575    llvm::Value *NumThreads =
9576        OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9577    llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9578    llvm::Value *NumIterations =
9579        OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9580    llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9581    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9582        CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9583
9584    llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9585        BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9586        nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9587
9588    llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9589        NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9590        DynCGGroupMem, HasNoWait);
9591
9592    CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9593        CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9594        DeviceID, RTLoc, AllocaIP));
9595  };
9596
9597  if (RequiresOuterTask)
9598    CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9599  else
9600    OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9601}
9602
9603static void
9604emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9605                   const OMPExecutableDirective &D,
9606                   llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9607                   bool RequiresOuterTask, const CapturedStmt &CS,
9608                   bool OffloadingMandatory, CodeGenFunction &CGF) {
9609
9610  // Notify that the host version must be executed.
9611  auto &&ElseGen =
9612      [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9613       OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9614        emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9615                               RequiresOuterTask, CS, OffloadingMandatory, CGF);
9616      };
9617
9618  if (RequiresOuterTask) {
9619    CodeGenFunction::OMPTargetDataInfo InputInfo;
9620    CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9621  } else {
9622    OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9623  }
9624}
9625
9626void CGOpenMPRuntime::emitTargetCall(
9627    CodeGenFunction &CGF, const OMPExecutableDirective &D,
9628    llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9629    llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9630    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9631                                     const OMPLoopDirective &D)>
9632        SizeEmitter) {
9633  if (!CGF.HaveInsertPoint())
9634    return;
9635
9636  const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9637                                   CGM.getLangOpts().OpenMPOffloadMandatory;
9638
9639  assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9640
9641  const bool RequiresOuterTask =
9642      D.hasClausesOfKind<OMPDependClause>() ||
9643      D.hasClausesOfKind<OMPNowaitClause>() ||
9644      D.hasClausesOfKind<OMPInReductionClause>() ||
9645      (CGM.getLangOpts().OpenMP >= 51 &&
9646       needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9647       D.hasClausesOfKind<OMPThreadLimitClause>());
9648  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9649  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9650  auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9651                                            PrePostActionTy &) {
9652    CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9653  };
9654  emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9655
9656  CodeGenFunction::OMPTargetDataInfo InputInfo;
9657  llvm::Value *MapTypesArray = nullptr;
9658  llvm::Value *MapNamesArray = nullptr;
9659
9660  auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9661                          RequiresOuterTask, &CS, OffloadingMandatory, Device,
9662                          OutlinedFnID, &InputInfo, &MapTypesArray,
9663                          &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9664                                                       PrePostActionTy &) {
9665    emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9666                               RequiresOuterTask, CS, OffloadingMandatory,
9667                               Device, OutlinedFnID, InputInfo, MapTypesArray,
9668                               MapNamesArray, SizeEmitter, CGF, CGM);
9669  };
9670
9671  auto &&TargetElseGen =
9672      [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9673       OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9674        emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9675                           CS, OffloadingMandatory, CGF);
9676      };
9677
9678  // If we have a target function ID it means that we need to support
9679  // offloading, otherwise, just execute on the host. We need to execute on host
9680  // regardless of the conditional in the if clause if, e.g., the user do not
9681  // specify target triples.
9682  if (OutlinedFnID) {
9683    if (IfCond) {
9684      emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9685    } else {
9686      RegionCodeGenTy ThenRCG(TargetThenGen);
9687      ThenRCG(CGF);
9688    }
9689  } else {
9690    RegionCodeGenTy ElseRCG(TargetElseGen);
9691    ElseRCG(CGF);
9692  }
9693}
9694
9695void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9696                                                    StringRef ParentName) {
9697  if (!S)
9698    return;
9699
9700  // Codegen OMP target directives that offload compute to the device.
9701  bool RequiresDeviceCodegen =
9702      isa<OMPExecutableDirective>(S) &&
9703      isOpenMPTargetExecutionDirective(
9704          cast<OMPExecutableDirective>(S)->getDirectiveKind());
9705
9706  if (RequiresDeviceCodegen) {
9707    const auto &E = *cast<OMPExecutableDirective>(S);
9708
9709    llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9710        CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9711
9712    // Is this a target region that should not be emitted as an entry point? If
9713    // so just signal we are done with this target region.
9714    if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9715      return;
9716
9717    switch (E.getDirectiveKind()) {
9718    case OMPD_target:
9719      CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9720                                                   cast<OMPTargetDirective>(E));
9721      break;
9722    case OMPD_target_parallel:
9723      CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9724          CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9725      break;
9726    case OMPD_target_teams:
9727      CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9728          CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9729      break;
9730    case OMPD_target_teams_distribute:
9731      CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9732          CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9733      break;
9734    case OMPD_target_teams_distribute_simd:
9735      CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9736          CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9737      break;
9738    case OMPD_target_parallel_for:
9739      CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9740          CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9741      break;
9742    case OMPD_target_parallel_for_simd:
9743      CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9744          CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9745      break;
9746    case OMPD_target_simd:
9747      CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9748          CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9749      break;
9750    case OMPD_target_teams_distribute_parallel_for:
9751      CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9752          CGM, ParentName,
9753          cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9754      break;
9755    case OMPD_target_teams_distribute_parallel_for_simd:
9756      CodeGenFunction::
9757          EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9758              CGM, ParentName,
9759              cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9760      break;
9761    case OMPD_target_teams_loop:
9762      CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9763          CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9764      break;
9765    case OMPD_target_parallel_loop:
9766      CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9767          CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9768      break;
9769    case OMPD_parallel:
9770    case OMPD_for:
9771    case OMPD_parallel_for:
9772    case OMPD_parallel_master:
9773    case OMPD_parallel_sections:
9774    case OMPD_for_simd:
9775    case OMPD_parallel_for_simd:
9776    case OMPD_cancel:
9777    case OMPD_cancellation_point:
9778    case OMPD_ordered:
9779    case OMPD_threadprivate:
9780    case OMPD_allocate:
9781    case OMPD_task:
9782    case OMPD_simd:
9783    case OMPD_tile:
9784    case OMPD_unroll:
9785    case OMPD_sections:
9786    case OMPD_section:
9787    case OMPD_single:
9788    case OMPD_master:
9789    case OMPD_critical:
9790    case OMPD_taskyield:
9791    case OMPD_barrier:
9792    case OMPD_taskwait:
9793    case OMPD_taskgroup:
9794    case OMPD_atomic:
9795    case OMPD_flush:
9796    case OMPD_depobj:
9797    case OMPD_scan:
9798    case OMPD_teams:
9799    case OMPD_target_data:
9800    case OMPD_target_exit_data:
9801    case OMPD_target_enter_data:
9802    case OMPD_distribute:
9803    case OMPD_distribute_simd:
9804    case OMPD_distribute_parallel_for:
9805    case OMPD_distribute_parallel_for_simd:
9806    case OMPD_teams_distribute:
9807    case OMPD_teams_distribute_simd:
9808    case OMPD_teams_distribute_parallel_for:
9809    case OMPD_teams_distribute_parallel_for_simd:
9810    case OMPD_target_update:
9811    case OMPD_declare_simd:
9812    case OMPD_declare_variant:
9813    case OMPD_begin_declare_variant:
9814    case OMPD_end_declare_variant:
9815    case OMPD_declare_target:
9816    case OMPD_end_declare_target:
9817    case OMPD_declare_reduction:
9818    case OMPD_declare_mapper:
9819    case OMPD_taskloop:
9820    case OMPD_taskloop_simd:
9821    case OMPD_master_taskloop:
9822    case OMPD_master_taskloop_simd:
9823    case OMPD_parallel_master_taskloop:
9824    case OMPD_parallel_master_taskloop_simd:
9825    case OMPD_requires:
9826    case OMPD_metadirective:
9827    case OMPD_unknown:
9828    default:
9829      llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9830    }
9831    return;
9832  }
9833
9834  if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9835    if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9836      return;
9837
9838    scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9839    return;
9840  }
9841
9842  // If this is a lambda function, look into its body.
9843  if (const auto *L = dyn_cast<LambdaExpr>(S))
9844    S = L->getBody();
9845
9846  // Keep looking for target regions recursively.
9847  for (const Stmt *II : S->children())
9848    scanForTargetRegionsFunctions(II, ParentName);
9849}
9850
9851static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9852  std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9853      OMPDeclareTargetDeclAttr::getDeviceType(VD);
9854  if (!DevTy)
9855    return false;
9856  // Do not emit device_type(nohost) functions for the host.
9857  if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9858    return true;
9859  // Do not emit device_type(host) functions for the device.
9860  if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9861    return true;
9862  return false;
9863}
9864
9865bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9866  // If emitting code for the host, we do not process FD here. Instead we do
9867  // the normal code generation.
9868  if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9869    if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9870      if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9871                                  CGM.getLangOpts().OpenMPIsTargetDevice))
9872        return true;
9873    return false;
9874  }
9875
9876  const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9877  // Try to detect target regions in the function.
9878  if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9879    StringRef Name = CGM.getMangledName(GD);
9880    scanForTargetRegionsFunctions(FD->getBody(), Name);
9881    if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9882                                CGM.getLangOpts().OpenMPIsTargetDevice))
9883      return true;
9884  }
9885
9886  // Do not to emit function if it is not marked as declare target.
9887  return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9888         AlreadyEmittedTargetDecls.count(VD) == 0;
9889}
9890
9891bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9892  if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9893                              CGM.getLangOpts().OpenMPIsTargetDevice))
9894    return true;
9895
9896  if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9897    return false;
9898
9899  // Check if there are Ctors/Dtors in this declaration and look for target
9900  // regions in it. We use the complete variant to produce the kernel name
9901  // mangling.
9902  QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9903  if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9904    for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9905      StringRef ParentName =
9906          CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9907      scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9908    }
9909    if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9910      StringRef ParentName =
9911          CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9912      scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9913    }
9914  }
9915
9916  // Do not to emit variable if it is not marked as declare target.
9917  std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9918      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9919          cast<VarDecl>(GD.getDecl()));
9920  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9921      ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9922        *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9923       HasRequiresUnifiedSharedMemory)) {
9924    DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9925    return true;
9926  }
9927  return false;
9928}
9929
9930void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9931                                                   llvm::Constant *Addr) {
9932  if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9933      !CGM.getLangOpts().OpenMPIsTargetDevice)
9934    return;
9935
9936  std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9937      OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9938
9939  // If this is an 'extern' declaration we defer to the canonical definition and
9940  // do not emit an offloading entry.
9941  if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
9942      VD->hasExternalStorage())
9943    return;
9944
9945  if (!Res) {
9946    if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9947      // Register non-target variables being emitted in device code (debug info
9948      // may cause this).
9949      StringRef VarName = CGM.getMangledName(VD);
9950      EmittedNonTargetVariables.try_emplace(VarName, Addr);
9951    }
9952    return;
9953  }
9954
9955  auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
9956  auto LinkageForVariable = [&VD, this]() {
9957    return CGM.getLLVMLinkageVarDefinition(VD);
9958  };
9959
9960  std::vector<llvm::GlobalVariable *> GeneratedRefs;
9961  OMPBuilder.registerTargetGlobalVariable(
9962      convertCaptureClause(VD), convertDeviceClause(VD),
9963      VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
9964      VD->isExternallyVisible(),
9965      getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
9966                                  VD->getCanonicalDecl()->getBeginLoc()),
9967      CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
9968      CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
9969      CGM.getTypes().ConvertTypeForMem(
9970          CGM.getContext().getPointerType(VD->getType())),
9971      Addr);
9972
9973  for (auto *ref : GeneratedRefs)
9974    CGM.addCompilerUsedGlobal(ref);
9975}
9976
9977bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9978  if (isa<FunctionDecl>(GD.getDecl()) ||
9979      isa<OMPDeclareReductionDecl>(GD.getDecl()))
9980    return emitTargetFunctions(GD);
9981
9982  return emitTargetGlobalVariable(GD);
9983}
9984
9985void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9986  for (const VarDecl *VD : DeferredGlobalVariables) {
9987    std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9988        OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9989    if (!Res)
9990      continue;
9991    if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9992         *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9993        !HasRequiresUnifiedSharedMemory) {
9994      CGM.EmitGlobal(VD);
9995    } else {
9996      assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9997              ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9998                *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9999               HasRequiresUnifiedSharedMemory)) &&
10000             "Expected link clause or to clause with unified memory.");
10001      (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10002    }
10003  }
10004}
10005
10006void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10007    CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10008  assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10009         " Expected target-based directive.");
10010}
10011
10012void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10013  for (const OMPClause *Clause : D->clauselists()) {
10014    if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10015      HasRequiresUnifiedSharedMemory = true;
10016      OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10017    } else if (const auto *AC =
10018                   dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10019      switch (AC->getAtomicDefaultMemOrderKind()) {
10020      case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10021        RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10022        break;
10023      case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10024        RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10025        break;
10026      case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10027        RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10028        break;
10029      case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10030        break;
10031      }
10032    }
10033  }
10034}
10035
10036llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10037  return RequiresAtomicOrdering;
10038}
10039
10040bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10041                                                       LangAS &AS) {
10042  if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10043    return false;
10044  const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10045  switch(A->getAllocatorType()) {
10046  case OMPAllocateDeclAttr::OMPNullMemAlloc:
10047  case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10048  // Not supported, fallback to the default mem space.
10049  case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10050  case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10051  case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10052  case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10053  case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10054  case OMPAllocateDeclAttr::OMPConstMemAlloc:
10055  case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10056    AS = LangAS::Default;
10057    return true;
10058  case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10059    llvm_unreachable("Expected predefined allocator for the variables with the "
10060                     "static storage.");
10061  }
10062  return false;
10063}
10064
10065bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10066  return HasRequiresUnifiedSharedMemory;
10067}
10068
10069CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10070    CodeGenModule &CGM)
10071    : CGM(CGM) {
10072  if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10073    SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10074    CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10075  }
10076}
10077
10078CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10079  if (CGM.getLangOpts().OpenMPIsTargetDevice)
10080    CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10081}
10082
10083bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10084  if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10085    return true;
10086
10087  const auto *D = cast<FunctionDecl>(GD.getDecl());
10088  // Do not to emit function if it is marked as declare target as it was already
10089  // emitted.
10090  if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10091    if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10092      if (auto *F = dyn_cast_or_null<llvm::Function>(
10093              CGM.GetGlobalValue(CGM.getMangledName(GD))))
10094        return !F->isDeclaration();
10095      return false;
10096    }
10097    return true;
10098  }
10099
10100  return !AlreadyEmittedTargetDecls.insert(D).second;
10101}
10102
10103llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10104  // If we don't have entries or if we are emitting code for the device, we
10105  // don't need to do anything.
10106  if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10107      CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsTargetDevice ||
10108      (OMPBuilder.OffloadInfoManager.empty() &&
10109       !HasEmittedDeclareTargetRegion && !HasEmittedTargetRegion))
10110    return nullptr;
10111
10112  // Create and register the function that handles the requires directives.
10113  ASTContext &C = CGM.getContext();
10114
10115  llvm::Function *RequiresRegFn;
10116  {
10117    CodeGenFunction CGF(CGM);
10118    const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10119    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10120    std::string ReqName = getName({"omp_offloading", "requires_reg"});
10121    RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10122    CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10123    // TODO: check for other requires clauses.
10124    // The requires directive takes effect only when a target region is
10125    // present in the compilation unit. Otherwise it is ignored and not
10126    // passed to the runtime. This avoids the runtime from throwing an error
10127    // for mismatching requires clauses across compilation units that don't
10128    // contain at least 1 target region.
10129    assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion ||
10130            !OMPBuilder.OffloadInfoManager.empty()) &&
10131           "Target or declare target region expected.");
10132    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10133                            CGM.getModule(), OMPRTL___tgt_register_requires),
10134                        llvm::ConstantInt::get(
10135                            CGM.Int64Ty, OMPBuilder.Config.getRequiresFlags()));
10136    CGF.FinishFunction();
10137  }
10138  return RequiresRegFn;
10139}
10140
10141void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10142                                    const OMPExecutableDirective &D,
10143                                    SourceLocation Loc,
10144                                    llvm::Function *OutlinedFn,
10145                                    ArrayRef<llvm::Value *> CapturedVars) {
10146  if (!CGF.HaveInsertPoint())
10147    return;
10148
10149  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10150  CodeGenFunction::RunCleanupsScope Scope(CGF);
10151
10152  // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10153  llvm::Value *Args[] = {
10154      RTLoc,
10155      CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10156      CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10157  llvm::SmallVector<llvm::Value *, 16> RealArgs;
10158  RealArgs.append(std::begin(Args), std::end(Args));
10159  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10160
10161  llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10162      CGM.getModule(), OMPRTL___kmpc_fork_teams);
10163  CGF.EmitRuntimeCall(RTLFn, RealArgs);
10164}
10165
10166void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10167                                         const Expr *NumTeams,
10168                                         const Expr *ThreadLimit,
10169                                         SourceLocation Loc) {
10170  if (!CGF.HaveInsertPoint())
10171    return;
10172
10173  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10174
10175  llvm::Value *NumTeamsVal =
10176      NumTeams
10177          ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10178                                      CGF.CGM.Int32Ty, /* isSigned = */ true)
10179          : CGF.Builder.getInt32(0);
10180
10181  llvm::Value *ThreadLimitVal =
10182      ThreadLimit
10183          ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10184                                      CGF.CGM.Int32Ty, /* isSigned = */ true)
10185          : CGF.Builder.getInt32(0);
10186
10187  // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10188  llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10189                                     ThreadLimitVal};
10190  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10191                          CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10192                      PushNumTeamsArgs);
10193}
10194
10195void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10196                                            const Expr *ThreadLimit,
10197                                            SourceLocation Loc) {
10198  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10199  llvm::Value *ThreadLimitVal =
10200      ThreadLimit
10201          ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10202                                      CGF.CGM.Int32Ty, /* isSigned = */ true)
10203          : CGF.Builder.getInt32(0);
10204
10205  // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10206  llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10207                                    ThreadLimitVal};
10208  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10209                          CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10210                      ThreadLimitArgs);
10211}
10212
10213void CGOpenMPRuntime::emitTargetDataCalls(
10214    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10215    const Expr *Device, const RegionCodeGenTy &CodeGen,
10216    CGOpenMPRuntime::TargetDataInfo &Info) {
10217  if (!CGF.HaveInsertPoint())
10218    return;
10219
10220  // Action used to replace the default codegen action and turn privatization
10221  // off.
10222  PrePostActionTy NoPrivAction;
10223
10224  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10225
10226  llvm::Value *IfCondVal = nullptr;
10227  if (IfCond)
10228    IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10229
10230  // Emit device ID if any.
10231  llvm::Value *DeviceID = nullptr;
10232  if (Device) {
10233    DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10234                                         CGF.Int64Ty, /*isSigned=*/true);
10235  } else {
10236    DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10237  }
10238
10239  // Fill up the arrays with all the mapped variables.
10240  MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10241  auto GenMapInfoCB =
10242      [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10243    CGF.Builder.restoreIP(CodeGenIP);
10244    // Get map clause information.
10245    MappableExprsHandler MEHandler(D, CGF);
10246    MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10247
10248    auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10249      return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10250    };
10251    if (CGM.getCodeGenOpts().getDebugInfo() !=
10252        llvm::codegenoptions::NoDebugInfo) {
10253      CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10254      llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10255                      FillInfoMap);
10256    }
10257
10258    return CombinedInfo;
10259  };
10260  using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10261  auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10262    CGF.Builder.restoreIP(CodeGenIP);
10263    switch (BodyGenType) {
10264    case BodyGenTy::Priv:
10265      if (!Info.CaptureDeviceAddrMap.empty())
10266        CodeGen(CGF);
10267      break;
10268    case BodyGenTy::DupNoPriv:
10269      if (!Info.CaptureDeviceAddrMap.empty()) {
10270        CodeGen.setAction(NoPrivAction);
10271        CodeGen(CGF);
10272      }
10273      break;
10274    case BodyGenTy::NoPriv:
10275      if (Info.CaptureDeviceAddrMap.empty()) {
10276        CodeGen.setAction(NoPrivAction);
10277        CodeGen(CGF);
10278      }
10279      break;
10280    }
10281    return InsertPointTy(CGF.Builder.GetInsertBlock(),
10282                         CGF.Builder.GetInsertPoint());
10283  };
10284
10285  auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10286    if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10287      Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10288    }
10289  };
10290
10291  auto CustomMapperCB = [&](unsigned int I) {
10292    llvm::Value *MFunc = nullptr;
10293    if (CombinedInfo.Mappers[I]) {
10294      Info.HasMapper = true;
10295      MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10296          cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10297    }
10298    return MFunc;
10299  };
10300
10301  // Source location for the ident struct
10302  llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10303
10304  InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10305                         CGF.AllocaInsertPt->getIterator());
10306  InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10307                          CGF.Builder.GetInsertPoint());
10308  llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10309  CGF.Builder.restoreIP(OMPBuilder.createTargetData(
10310      OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10311      /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10312}
10313
10314void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10315    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10316    const Expr *Device) {
10317  if (!CGF.HaveInsertPoint())
10318    return;
10319
10320  assert((isa<OMPTargetEnterDataDirective>(D) ||
10321          isa<OMPTargetExitDataDirective>(D) ||
10322          isa<OMPTargetUpdateDirective>(D)) &&
10323         "Expecting either target enter, exit data, or update directives.");
10324
10325  CodeGenFunction::OMPTargetDataInfo InputInfo;
10326  llvm::Value *MapTypesArray = nullptr;
10327  llvm::Value *MapNamesArray = nullptr;
10328  // Generate the code for the opening of the data environment.
10329  auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10330                    &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10331    // Emit device ID if any.
10332    llvm::Value *DeviceID = nullptr;
10333    if (Device) {
10334      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10335                                           CGF.Int64Ty, /*isSigned=*/true);
10336    } else {
10337      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10338    }
10339
10340    // Emit the number of elements in the offloading arrays.
10341    llvm::Constant *PointerNum =
10342        CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10343
10344    // Source location for the ident struct
10345    llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10346
10347    llvm::Value *OffloadingArgs[] = {RTLoc,
10348                                     DeviceID,
10349                                     PointerNum,
10350                                     InputInfo.BasePointersArray.getPointer(),
10351                                     InputInfo.PointersArray.getPointer(),
10352                                     InputInfo.SizesArray.getPointer(),
10353                                     MapTypesArray,
10354                                     MapNamesArray,
10355                                     InputInfo.MappersArray.getPointer()};
10356
10357    // Select the right runtime function call for each standalone
10358    // directive.
10359    const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10360    RuntimeFunction RTLFn;
10361    switch (D.getDirectiveKind()) {
10362    case OMPD_target_enter_data:
10363      RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10364                        : OMPRTL___tgt_target_data_begin_mapper;
10365      break;
10366    case OMPD_target_exit_data:
10367      RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10368                        : OMPRTL___tgt_target_data_end_mapper;
10369      break;
10370    case OMPD_target_update:
10371      RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10372                        : OMPRTL___tgt_target_data_update_mapper;
10373      break;
10374    case OMPD_parallel:
10375    case OMPD_for:
10376    case OMPD_parallel_for:
10377    case OMPD_parallel_master:
10378    case OMPD_parallel_sections:
10379    case OMPD_for_simd:
10380    case OMPD_parallel_for_simd:
10381    case OMPD_cancel:
10382    case OMPD_cancellation_point:
10383    case OMPD_ordered:
10384    case OMPD_threadprivate:
10385    case OMPD_allocate:
10386    case OMPD_task:
10387    case OMPD_simd:
10388    case OMPD_tile:
10389    case OMPD_unroll:
10390    case OMPD_sections:
10391    case OMPD_section:
10392    case OMPD_single:
10393    case OMPD_master:
10394    case OMPD_critical:
10395    case OMPD_taskyield:
10396    case OMPD_barrier:
10397    case OMPD_taskwait:
10398    case OMPD_taskgroup:
10399    case OMPD_atomic:
10400    case OMPD_flush:
10401    case OMPD_depobj:
10402    case OMPD_scan:
10403    case OMPD_teams:
10404    case OMPD_target_data:
10405    case OMPD_distribute:
10406    case OMPD_distribute_simd:
10407    case OMPD_distribute_parallel_for:
10408    case OMPD_distribute_parallel_for_simd:
10409    case OMPD_teams_distribute:
10410    case OMPD_teams_distribute_simd:
10411    case OMPD_teams_distribute_parallel_for:
10412    case OMPD_teams_distribute_parallel_for_simd:
10413    case OMPD_declare_simd:
10414    case OMPD_declare_variant:
10415    case OMPD_begin_declare_variant:
10416    case OMPD_end_declare_variant:
10417    case OMPD_declare_target:
10418    case OMPD_end_declare_target:
10419    case OMPD_declare_reduction:
10420    case OMPD_declare_mapper:
10421    case OMPD_taskloop:
10422    case OMPD_taskloop_simd:
10423    case OMPD_master_taskloop:
10424    case OMPD_master_taskloop_simd:
10425    case OMPD_parallel_master_taskloop:
10426    case OMPD_parallel_master_taskloop_simd:
10427    case OMPD_target:
10428    case OMPD_target_simd:
10429    case OMPD_target_teams_distribute:
10430    case OMPD_target_teams_distribute_simd:
10431    case OMPD_target_teams_distribute_parallel_for:
10432    case OMPD_target_teams_distribute_parallel_for_simd:
10433    case OMPD_target_teams:
10434    case OMPD_target_parallel:
10435    case OMPD_target_parallel_for:
10436    case OMPD_target_parallel_for_simd:
10437    case OMPD_requires:
10438    case OMPD_metadirective:
10439    case OMPD_unknown:
10440    default:
10441      llvm_unreachable("Unexpected standalone target data directive.");
10442      break;
10443    }
10444    CGF.EmitRuntimeCall(
10445        OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10446        OffloadingArgs);
10447  };
10448
10449  auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10450                          &MapNamesArray](CodeGenFunction &CGF,
10451                                          PrePostActionTy &) {
10452    // Fill up the arrays with all the mapped variables.
10453    MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10454
10455    // Get map clause information.
10456    MappableExprsHandler MEHandler(D, CGF);
10457    MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10458
10459    CGOpenMPRuntime::TargetDataInfo Info;
10460    // Fill up the arrays and create the arguments.
10461    emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10462                         /*IsNonContiguous=*/true);
10463    bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10464                             D.hasClausesOfKind<OMPNowaitClause>();
10465    bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10466                     llvm::codegenoptions::NoDebugInfo;
10467    OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10468                                            EmitDebug,
10469                                            /*ForEndCall=*/false);
10470    InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10471    InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10472                                          CGF.VoidPtrTy, CGM.getPointerAlign());
10473    InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10474                                      CGM.getPointerAlign());
10475    InputInfo.SizesArray =
10476        Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10477    InputInfo.MappersArray =
10478        Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10479    MapTypesArray = Info.RTArgs.MapTypesArray;
10480    MapNamesArray = Info.RTArgs.MapNamesArray;
10481    if (RequiresOuterTask)
10482      CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10483    else
10484      emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10485  };
10486
10487  if (IfCond) {
10488    emitIfClause(CGF, IfCond, TargetThenGen,
10489                 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10490  } else {
10491    RegionCodeGenTy ThenRCG(TargetThenGen);
10492    ThenRCG(CGF);
10493  }
10494}
10495
10496namespace {
10497  /// Kind of parameter in a function with 'declare simd' directive.
10498enum ParamKindTy {
10499  Linear,
10500  LinearRef,
10501  LinearUVal,
10502  LinearVal,
10503  Uniform,
10504  Vector,
10505};
10506/// Attribute set of the parameter.
10507struct ParamAttrTy {
10508  ParamKindTy Kind = Vector;
10509  llvm::APSInt StrideOrArg;
10510  llvm::APSInt Alignment;
10511  bool HasVarStride = false;
10512};
10513} // namespace
10514
10515static unsigned evaluateCDTSize(const FunctionDecl *FD,
10516                                ArrayRef<ParamAttrTy> ParamAttrs) {
10517  // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10518  // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10519  // of that clause. The VLEN value must be power of 2.
10520  // In other case the notion of the function`s "characteristic data type" (CDT)
10521  // is used to compute the vector length.
10522  // CDT is defined in the following order:
10523  //   a) For non-void function, the CDT is the return type.
10524  //   b) If the function has any non-uniform, non-linear parameters, then the
10525  //   CDT is the type of the first such parameter.
10526  //   c) If the CDT determined by a) or b) above is struct, union, or class
10527  //   type which is pass-by-value (except for the type that maps to the
10528  //   built-in complex data type), the characteristic data type is int.
10529  //   d) If none of the above three cases is applicable, the CDT is int.
10530  // The VLEN is then determined based on the CDT and the size of vector
10531  // register of that ISA for which current vector version is generated. The
10532  // VLEN is computed using the formula below:
10533  //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10534  // where vector register size specified in section 3.2.1 Registers and the
10535  // Stack Frame of original AMD64 ABI document.
10536  QualType RetType = FD->getReturnType();
10537  if (RetType.isNull())
10538    return 0;
10539  ASTContext &C = FD->getASTContext();
10540  QualType CDT;
10541  if (!RetType.isNull() && !RetType->isVoidType()) {
10542    CDT = RetType;
10543  } else {
10544    unsigned Offset = 0;
10545    if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10546      if (ParamAttrs[Offset].Kind == Vector)
10547        CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10548      ++Offset;
10549    }
10550    if (CDT.isNull()) {
10551      for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10552        if (ParamAttrs[I + Offset].Kind == Vector) {
10553          CDT = FD->getParamDecl(I)->getType();
10554          break;
10555        }
10556      }
10557    }
10558  }
10559  if (CDT.isNull())
10560    CDT = C.IntTy;
10561  CDT = CDT->getCanonicalTypeUnqualified();
10562  if (CDT->isRecordType() || CDT->isUnionType())
10563    CDT = C.IntTy;
10564  return C.getTypeSize(CDT);
10565}
10566
10567/// Mangle the parameter part of the vector function name according to
10568/// their OpenMP classification. The mangling function is defined in
10569/// section 4.5 of the AAVFABI(2021Q1).
10570static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10571  SmallString<256> Buffer;
10572  llvm::raw_svector_ostream Out(Buffer);
10573  for (const auto &ParamAttr : ParamAttrs) {
10574    switch (ParamAttr.Kind) {
10575    case Linear:
10576      Out << 'l';
10577      break;
10578    case LinearRef:
10579      Out << 'R';
10580      break;
10581    case LinearUVal:
10582      Out << 'U';
10583      break;
10584    case LinearVal:
10585      Out << 'L';
10586      break;
10587    case Uniform:
10588      Out << 'u';
10589      break;
10590    case Vector:
10591      Out << 'v';
10592      break;
10593    }
10594    if (ParamAttr.HasVarStride)
10595      Out << "s" << ParamAttr.StrideOrArg;
10596    else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10597             ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10598      // Don't print the step value if it is not present or if it is
10599      // equal to 1.
10600      if (ParamAttr.StrideOrArg < 0)
10601        Out << 'n' << -ParamAttr.StrideOrArg;
10602      else if (ParamAttr.StrideOrArg != 1)
10603        Out << ParamAttr.StrideOrArg;
10604    }
10605
10606    if (!!ParamAttr.Alignment)
10607      Out << 'a' << ParamAttr.Alignment;
10608  }
10609
10610  return std::string(Out.str());
10611}
10612
10613static void
10614emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10615                           const llvm::APSInt &VLENVal,
10616                           ArrayRef<ParamAttrTy> ParamAttrs,
10617                           OMPDeclareSimdDeclAttr::BranchStateTy State) {
10618  struct ISADataTy {
10619    char ISA;
10620    unsigned VecRegSize;
10621  };
10622  ISADataTy ISAData[] = {
10623      {
10624          'b', 128
10625      }, // SSE
10626      {
10627          'c', 256
10628      }, // AVX
10629      {
10630          'd', 256
10631      }, // AVX2
10632      {
10633          'e', 512
10634      }, // AVX512
10635  };
10636  llvm::SmallVector<char, 2> Masked;
10637  switch (State) {
10638  case OMPDeclareSimdDeclAttr::BS_Undefined:
10639    Masked.push_back('N');
10640    Masked.push_back('M');
10641    break;
10642  case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10643    Masked.push_back('N');
10644    break;
10645  case OMPDeclareSimdDeclAttr::BS_Inbranch:
10646    Masked.push_back('M');
10647    break;
10648  }
10649  for (char Mask : Masked) {
10650    for (const ISADataTy &Data : ISAData) {
10651      SmallString<256> Buffer;
10652      llvm::raw_svector_ostream Out(Buffer);
10653      Out << "_ZGV" << Data.ISA << Mask;
10654      if (!VLENVal) {
10655        unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10656        assert(NumElts && "Non-zero simdlen/cdtsize expected");
10657        Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10658      } else {
10659        Out << VLENVal;
10660      }
10661      Out << mangleVectorParameters(ParamAttrs);
10662      Out << '_' << Fn->getName();
10663      Fn->addFnAttr(Out.str());
10664    }
10665  }
10666}
10667
10668// This are the Functions that are needed to mangle the name of the
10669// vector functions generated by the compiler, according to the rules
10670// defined in the "Vector Function ABI specifications for AArch64",
10671// available at
10672// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10673
10674/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10675static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10676  QT = QT.getCanonicalType();
10677
10678  if (QT->isVoidType())
10679    return false;
10680
10681  if (Kind == ParamKindTy::Uniform)
10682    return false;
10683
10684  if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10685    return false;
10686
10687  if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10688      !QT->isReferenceType())
10689    return false;
10690
10691  return true;
10692}
10693
10694/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10695static bool getAArch64PBV(QualType QT, ASTContext &C) {
10696  QT = QT.getCanonicalType();
10697  unsigned Size = C.getTypeSize(QT);
10698
10699  // Only scalars and complex within 16 bytes wide set PVB to true.
10700  if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10701    return false;
10702
10703  if (QT->isFloatingType())
10704    return true;
10705
10706  if (QT->isIntegerType())
10707    return true;
10708
10709  if (QT->isPointerType())
10710    return true;
10711
10712  // TODO: Add support for complex types (section 3.1.2, item 2).
10713
10714  return false;
10715}
10716
10717/// Computes the lane size (LS) of a return type or of an input parameter,
10718/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10719/// TODO: Add support for references, section 3.2.1, item 1.
10720static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10721  if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10722    QualType PTy = QT.getCanonicalType()->getPointeeType();
10723    if (getAArch64PBV(PTy, C))
10724      return C.getTypeSize(PTy);
10725  }
10726  if (getAArch64PBV(QT, C))
10727    return C.getTypeSize(QT);
10728
10729  return C.getTypeSize(C.getUIntPtrType());
10730}
10731
10732// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10733// signature of the scalar function, as defined in 3.2.2 of the
10734// AAVFABI.
10735static std::tuple<unsigned, unsigned, bool>
10736getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10737  QualType RetType = FD->getReturnType().getCanonicalType();
10738
10739  ASTContext &C = FD->getASTContext();
10740
10741  bool OutputBecomesInput = false;
10742
10743  llvm::SmallVector<unsigned, 8> Sizes;
10744  if (!RetType->isVoidType()) {
10745    Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10746    if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10747      OutputBecomesInput = true;
10748  }
10749  for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10750    QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10751    Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10752  }
10753
10754  assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10755  // The LS of a function parameter / return value can only be a power
10756  // of 2, starting from 8 bits, up to 128.
10757  assert(llvm::all_of(Sizes,
10758                      [](unsigned Size) {
10759                        return Size == 8 || Size == 16 || Size == 32 ||
10760                               Size == 64 || Size == 128;
10761                      }) &&
10762         "Invalid size");
10763
10764  return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10765                         *std::max_element(std::begin(Sizes), std::end(Sizes)),
10766                         OutputBecomesInput);
10767}
10768
10769// Function used to add the attribute. The parameter `VLEN` is
10770// templated to allow the use of "x" when targeting scalable functions
10771// for SVE.
10772template <typename T>
10773static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10774                                 char ISA, StringRef ParSeq,
10775                                 StringRef MangledName, bool OutputBecomesInput,
10776                                 llvm::Function *Fn) {
10777  SmallString<256> Buffer;
10778  llvm::raw_svector_ostream Out(Buffer);
10779  Out << Prefix << ISA << LMask << VLEN;
10780  if (OutputBecomesInput)
10781    Out << "v";
10782  Out << ParSeq << "_" << MangledName;
10783  Fn->addFnAttr(Out.str());
10784}
10785
10786// Helper function to generate the Advanced SIMD names depending on
10787// the value of the NDS when simdlen is not present.
10788static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10789                                      StringRef Prefix, char ISA,
10790                                      StringRef ParSeq, StringRef MangledName,
10791                                      bool OutputBecomesInput,
10792                                      llvm::Function *Fn) {
10793  switch (NDS) {
10794  case 8:
10795    addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10796                         OutputBecomesInput, Fn);
10797    addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10798                         OutputBecomesInput, Fn);
10799    break;
10800  case 16:
10801    addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10802                         OutputBecomesInput, Fn);
10803    addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10804                         OutputBecomesInput, Fn);
10805    break;
10806  case 32:
10807    addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10808                         OutputBecomesInput, Fn);
10809    addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10810                         OutputBecomesInput, Fn);
10811    break;
10812  case 64:
10813  case 128:
10814    addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10815                         OutputBecomesInput, Fn);
10816    break;
10817  default:
10818    llvm_unreachable("Scalar type is too wide.");
10819  }
10820}
10821
10822/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10823static void emitAArch64DeclareSimdFunction(
10824    CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10825    ArrayRef<ParamAttrTy> ParamAttrs,
10826    OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10827    char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10828
10829  // Get basic data for building the vector signature.
10830  const auto Data = getNDSWDS(FD, ParamAttrs);
10831  const unsigned NDS = std::get<0>(Data);
10832  const unsigned WDS = std::get<1>(Data);
10833  const bool OutputBecomesInput = std::get<2>(Data);
10834
10835  // Check the values provided via `simdlen` by the user.
10836  // 1. A `simdlen(1)` doesn't produce vector signatures,
10837  if (UserVLEN == 1) {
10838    unsigned DiagID = CGM.getDiags().getCustomDiagID(
10839        DiagnosticsEngine::Warning,
10840        "The clause simdlen(1) has no effect when targeting aarch64.");
10841    CGM.getDiags().Report(SLoc, DiagID);
10842    return;
10843  }
10844
10845  // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10846  // Advanced SIMD output.
10847  if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10848    unsigned DiagID = CGM.getDiags().getCustomDiagID(
10849        DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10850                                    "power of 2 when targeting Advanced SIMD.");
10851    CGM.getDiags().Report(SLoc, DiagID);
10852    return;
10853  }
10854
10855  // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10856  // limits.
10857  if (ISA == 's' && UserVLEN != 0) {
10858    if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10859      unsigned DiagID = CGM.getDiags().getCustomDiagID(
10860          DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10861                                      "lanes in the architectural constraints "
10862                                      "for SVE (min is 128-bit, max is "
10863                                      "2048-bit, by steps of 128-bit)");
10864      CGM.getDiags().Report(SLoc, DiagID) << WDS;
10865      return;
10866    }
10867  }
10868
10869  // Sort out parameter sequence.
10870  const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10871  StringRef Prefix = "_ZGV";
10872  // Generate simdlen from user input (if any).
10873  if (UserVLEN) {
10874    if (ISA == 's') {
10875      // SVE generates only a masked function.
10876      addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10877                           OutputBecomesInput, Fn);
10878    } else {
10879      assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10880      // Advanced SIMD generates one or two functions, depending on
10881      // the `[not]inbranch` clause.
10882      switch (State) {
10883      case OMPDeclareSimdDeclAttr::BS_Undefined:
10884        addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10885                             OutputBecomesInput, Fn);
10886        addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10887                             OutputBecomesInput, Fn);
10888        break;
10889      case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10890        addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10891                             OutputBecomesInput, Fn);
10892        break;
10893      case OMPDeclareSimdDeclAttr::BS_Inbranch:
10894        addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10895                             OutputBecomesInput, Fn);
10896        break;
10897      }
10898    }
10899  } else {
10900    // If no user simdlen is provided, follow the AAVFABI rules for
10901    // generating the vector length.
10902    if (ISA == 's') {
10903      // SVE, section 3.4.1, item 1.
10904      addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10905                           OutputBecomesInput, Fn);
10906    } else {
10907      assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10908      // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10909      // two vector names depending on the use of the clause
10910      // `[not]inbranch`.
10911      switch (State) {
10912      case OMPDeclareSimdDeclAttr::BS_Undefined:
10913        addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10914                                  OutputBecomesInput, Fn);
10915        addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10916                                  OutputBecomesInput, Fn);
10917        break;
10918      case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10919        addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10920                                  OutputBecomesInput, Fn);
10921        break;
10922      case OMPDeclareSimdDeclAttr::BS_Inbranch:
10923        addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10924                                  OutputBecomesInput, Fn);
10925        break;
10926      }
10927    }
10928  }
10929}
10930
10931void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10932                                              llvm::Function *Fn) {
10933  ASTContext &C = CGM.getContext();
10934  FD = FD->getMostRecentDecl();
10935  while (FD) {
10936    // Map params to their positions in function decl.
10937    llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10938    if (isa<CXXMethodDecl>(FD))
10939      ParamPositions.try_emplace(FD, 0);
10940    unsigned ParamPos = ParamPositions.size();
10941    for (const ParmVarDecl *P : FD->parameters()) {
10942      ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10943      ++ParamPos;
10944    }
10945    for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10946      llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10947      // Mark uniform parameters.
10948      for (const Expr *E : Attr->uniforms()) {
10949        E = E->IgnoreParenImpCasts();
10950        unsigned Pos;
10951        if (isa<CXXThisExpr>(E)) {
10952          Pos = ParamPositions[FD];
10953        } else {
10954          const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10955                                ->getCanonicalDecl();
10956          auto It = ParamPositions.find(PVD);
10957          assert(It != ParamPositions.end() && "Function parameter not found");
10958          Pos = It->second;
10959        }
10960        ParamAttrs[Pos].Kind = Uniform;
10961      }
10962      // Get alignment info.
10963      auto *NI = Attr->alignments_begin();
10964      for (const Expr *E : Attr->aligneds()) {
10965        E = E->IgnoreParenImpCasts();
10966        unsigned Pos;
10967        QualType ParmTy;
10968        if (isa<CXXThisExpr>(E)) {
10969          Pos = ParamPositions[FD];
10970          ParmTy = E->getType();
10971        } else {
10972          const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10973                                ->getCanonicalDecl();
10974          auto It = ParamPositions.find(PVD);
10975          assert(It != ParamPositions.end() && "Function parameter not found");
10976          Pos = It->second;
10977          ParmTy = PVD->getType();
10978        }
10979        ParamAttrs[Pos].Alignment =
10980            (*NI)
10981                ? (*NI)->EvaluateKnownConstInt(C)
10982                : llvm::APSInt::getUnsigned(
10983                      C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10984                          .getQuantity());
10985        ++NI;
10986      }
10987      // Mark linear parameters.
10988      auto *SI = Attr->steps_begin();
10989      auto *MI = Attr->modifiers_begin();
10990      for (const Expr *E : Attr->linears()) {
10991        E = E->IgnoreParenImpCasts();
10992        unsigned Pos;
10993        bool IsReferenceType = false;
10994        // Rescaling factor needed to compute the linear parameter
10995        // value in the mangled name.
10996        unsigned PtrRescalingFactor = 1;
10997        if (isa<CXXThisExpr>(E)) {
10998          Pos = ParamPositions[FD];
10999          auto *P = cast<PointerType>(E->getType());
11000          PtrRescalingFactor = CGM.getContext()
11001                                   .getTypeSizeInChars(P->getPointeeType())
11002                                   .getQuantity();
11003        } else {
11004          const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11005                                ->getCanonicalDecl();
11006          auto It = ParamPositions.find(PVD);
11007          assert(It != ParamPositions.end() && "Function parameter not found");
11008          Pos = It->second;
11009          if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11010            PtrRescalingFactor = CGM.getContext()
11011                                     .getTypeSizeInChars(P->getPointeeType())
11012                                     .getQuantity();
11013          else if (PVD->getType()->isReferenceType()) {
11014            IsReferenceType = true;
11015            PtrRescalingFactor =
11016                CGM.getContext()
11017                    .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11018                    .getQuantity();
11019          }
11020        }
11021        ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11022        if (*MI == OMPC_LINEAR_ref)
11023          ParamAttr.Kind = LinearRef;
11024        else if (*MI == OMPC_LINEAR_uval)
11025          ParamAttr.Kind = LinearUVal;
11026        else if (IsReferenceType)
11027          ParamAttr.Kind = LinearVal;
11028        else
11029          ParamAttr.Kind = Linear;
11030        // Assuming a stride of 1, for `linear` without modifiers.
11031        ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11032        if (*SI) {
11033          Expr::EvalResult Result;
11034          if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11035            if (const auto *DRE =
11036                    cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11037              if (const auto *StridePVD =
11038                      dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11039                ParamAttr.HasVarStride = true;
11040                auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11041                assert(It != ParamPositions.end() &&
11042                       "Function parameter not found");
11043                ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11044              }
11045            }
11046          } else {
11047            ParamAttr.StrideOrArg = Result.Val.getInt();
11048          }
11049        }
11050        // If we are using a linear clause on a pointer, we need to
11051        // rescale the value of linear_step with the byte size of the
11052        // pointee type.
11053        if (!ParamAttr.HasVarStride &&
11054            (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11055          ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11056        ++SI;
11057        ++MI;
11058      }
11059      llvm::APSInt VLENVal;
11060      SourceLocation ExprLoc;
11061      const Expr *VLENExpr = Attr->getSimdlen();
11062      if (VLENExpr) {
11063        VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11064        ExprLoc = VLENExpr->getExprLoc();
11065      }
11066      OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11067      if (CGM.getTriple().isX86()) {
11068        emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11069      } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11070        unsigned VLEN = VLENVal.getExtValue();
11071        StringRef MangledName = Fn->getName();
11072        if (CGM.getTarget().hasFeature("sve"))
11073          emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11074                                         MangledName, 's', 128, Fn, ExprLoc);
11075        else if (CGM.getTarget().hasFeature("neon"))
11076          emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11077                                         MangledName, 'n', 128, Fn, ExprLoc);
11078      }
11079    }
11080    FD = FD->getPreviousDecl();
11081  }
11082}
11083
11084namespace {
11085/// Cleanup action for doacross support.
11086class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11087public:
11088  static const int DoacrossFinArgs = 2;
11089
11090private:
11091  llvm::FunctionCallee RTLFn;
11092  llvm::Value *Args[DoacrossFinArgs];
11093
11094public:
11095  DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11096                    ArrayRef<llvm::Value *> CallArgs)
11097      : RTLFn(RTLFn) {
11098    assert(CallArgs.size() == DoacrossFinArgs);
11099    std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11100  }
11101  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11102    if (!CGF.HaveInsertPoint())
11103      return;
11104    CGF.EmitRuntimeCall(RTLFn, Args);
11105  }
11106};
11107} // namespace
11108
11109void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11110                                       const OMPLoopDirective &D,
11111                                       ArrayRef<Expr *> NumIterations) {
11112  if (!CGF.HaveInsertPoint())
11113    return;
11114
11115  ASTContext &C = CGM.getContext();
11116  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11117  RecordDecl *RD;
11118  if (KmpDimTy.isNull()) {
11119    // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11120    //  kmp_int64 lo; // lower
11121    //  kmp_int64 up; // upper
11122    //  kmp_int64 st; // stride
11123    // };
11124    RD = C.buildImplicitRecord("kmp_dim");
11125    RD->startDefinition();
11126    addFieldToRecordDecl(C, RD, Int64Ty);
11127    addFieldToRecordDecl(C, RD, Int64Ty);
11128    addFieldToRecordDecl(C, RD, Int64Ty);
11129    RD->completeDefinition();
11130    KmpDimTy = C.getRecordType(RD);
11131  } else {
11132    RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11133  }
11134  llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11135  QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11136                                            ArraySizeModifier::Normal, 0);
11137
11138  Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11139  CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11140  enum { LowerFD = 0, UpperFD, StrideFD };
11141  // Fill dims with data.
11142  for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11143    LValue DimsLVal = CGF.MakeAddrLValue(
11144        CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11145    // dims.upper = num_iterations;
11146    LValue UpperLVal = CGF.EmitLValueForField(
11147        DimsLVal, *std::next(RD->field_begin(), UpperFD));
11148    llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11149        CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11150        Int64Ty, NumIterations[I]->getExprLoc());
11151    CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11152    // dims.stride = 1;
11153    LValue StrideLVal = CGF.EmitLValueForField(
11154        DimsLVal, *std::next(RD->field_begin(), StrideFD));
11155    CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11156                          StrideLVal);
11157  }
11158
11159  // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11160  // kmp_int32 num_dims, struct kmp_dim * dims);
11161  llvm::Value *Args[] = {
11162      emitUpdateLocation(CGF, D.getBeginLoc()),
11163      getThreadID(CGF, D.getBeginLoc()),
11164      llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11165      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11166          CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11167          CGM.VoidPtrTy)};
11168
11169  llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11170      CGM.getModule(), OMPRTL___kmpc_doacross_init);
11171  CGF.EmitRuntimeCall(RTLFn, Args);
11172  llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11173      emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11174  llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11175      CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11176  CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11177                                             llvm::ArrayRef(FiniArgs));
11178}
11179
11180template <typename T>
11181static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11182                                const T *C, llvm::Value *ULoc,
11183                                llvm::Value *ThreadID) {
11184  QualType Int64Ty =
11185      CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11186  llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11187  QualType ArrayTy = CGM.getContext().getConstantArrayType(
11188      Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11189  Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11190  for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11191    const Expr *CounterVal = C->getLoopData(I);
11192    assert(CounterVal);
11193    llvm::Value *CntVal = CGF.EmitScalarConversion(
11194        CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11195        CounterVal->getExprLoc());
11196    CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11197                          /*Volatile=*/false, Int64Ty);
11198  }
11199  llvm::Value *Args[] = {
11200      ULoc, ThreadID, CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11201  llvm::FunctionCallee RTLFn;
11202  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11203  OMPDoacrossKind<T> ODK;
11204  if (ODK.isSource(C)) {
11205    RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11206                                                  OMPRTL___kmpc_doacross_post);
11207  } else {
11208    assert(ODK.isSink(C) && "Expect sink modifier.");
11209    RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11210                                                  OMPRTL___kmpc_doacross_wait);
11211  }
11212  CGF.EmitRuntimeCall(RTLFn, Args);
11213}
11214
11215void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11216                                          const OMPDependClause *C) {
11217  return EmitDoacrossOrdered<OMPDependClause>(
11218      CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11219      getThreadID(CGF, C->getBeginLoc()));
11220}
11221
11222void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11223                                          const OMPDoacrossClause *C) {
11224  return EmitDoacrossOrdered<OMPDoacrossClause>(
11225      CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11226      getThreadID(CGF, C->getBeginLoc()));
11227}
11228
11229void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11230                               llvm::FunctionCallee Callee,
11231                               ArrayRef<llvm::Value *> Args) const {
11232  assert(Loc.isValid() && "Outlined function call location must be valid.");
11233  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11234
11235  if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11236    if (Fn->doesNotThrow()) {
11237      CGF.EmitNounwindRuntimeCall(Fn, Args);
11238      return;
11239    }
11240  }
11241  CGF.EmitRuntimeCall(Callee, Args);
11242}
11243
11244void CGOpenMPRuntime::emitOutlinedFunctionCall(
11245    CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11246    ArrayRef<llvm::Value *> Args) const {
11247  emitCall(CGF, Loc, OutlinedFn, Args);
11248}
11249
11250void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11251  if (const auto *FD = dyn_cast<FunctionDecl>(D))
11252    if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11253      HasEmittedDeclareTargetRegion = true;
11254}
11255
11256Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11257                                             const VarDecl *NativeParam,
11258                                             const VarDecl *TargetParam) const {
11259  return CGF.GetAddrOfLocalVar(NativeParam);
11260}
11261
11262/// Return allocator value from expression, or return a null allocator (default
11263/// when no allocator specified).
11264static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11265                                    const Expr *Allocator) {
11266  llvm::Value *AllocVal;
11267  if (Allocator) {
11268    AllocVal = CGF.EmitScalarExpr(Allocator);
11269    // According to the standard, the original allocator type is a enum
11270    // (integer). Convert to pointer type, if required.
11271    AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11272                                        CGF.getContext().VoidPtrTy,
11273                                        Allocator->getExprLoc());
11274  } else {
11275    // If no allocator specified, it defaults to the null allocator.
11276    AllocVal = llvm::Constant::getNullValue(
11277        CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11278  }
11279  return AllocVal;
11280}
11281
11282/// Return the alignment from an allocate directive if present.
11283static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11284  std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11285
11286  if (!AllocateAlignment)
11287    return nullptr;
11288
11289  return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11290}
11291
11292Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11293                                                   const VarDecl *VD) {
11294  if (!VD)
11295    return Address::invalid();
11296  Address UntiedAddr = Address::invalid();
11297  Address UntiedRealAddr = Address::invalid();
11298  auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11299  if (It != FunctionToUntiedTaskStackMap.end()) {
11300    const UntiedLocalVarsAddressesMap &UntiedData =
11301        UntiedLocalVarsStack[It->second];
11302    auto I = UntiedData.find(VD);
11303    if (I != UntiedData.end()) {
11304      UntiedAddr = I->second.first;
11305      UntiedRealAddr = I->second.second;
11306    }
11307  }
11308  const VarDecl *CVD = VD->getCanonicalDecl();
11309  if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11310    // Use the default allocation.
11311    if (!isAllocatableDecl(VD))
11312      return UntiedAddr;
11313    llvm::Value *Size;
11314    CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11315    if (CVD->getType()->isVariablyModifiedType()) {
11316      Size = CGF.getTypeSize(CVD->getType());
11317      // Align the size: ((size + align - 1) / align) * align
11318      Size = CGF.Builder.CreateNUWAdd(
11319          Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11320      Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11321      Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11322    } else {
11323      CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11324      Size = CGM.getSize(Sz.alignTo(Align));
11325    }
11326    llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11327    const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11328    const Expr *Allocator = AA->getAllocator();
11329    llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11330    llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11331    SmallVector<llvm::Value *, 4> Args;
11332    Args.push_back(ThreadID);
11333    if (Alignment)
11334      Args.push_back(Alignment);
11335    Args.push_back(Size);
11336    Args.push_back(AllocVal);
11337    llvm::omp::RuntimeFunction FnID =
11338        Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11339    llvm::Value *Addr = CGF.EmitRuntimeCall(
11340        OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11341        getName({CVD->getName(), ".void.addr"}));
11342    llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11343        CGM.getModule(), OMPRTL___kmpc_free);
11344    QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11345    Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11346        Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11347    if (UntiedAddr.isValid())
11348      CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11349
11350    // Cleanup action for allocate support.
11351    class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11352      llvm::FunctionCallee RTLFn;
11353      SourceLocation::UIntTy LocEncoding;
11354      Address Addr;
11355      const Expr *AllocExpr;
11356
11357    public:
11358      OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11359                           SourceLocation::UIntTy LocEncoding, Address Addr,
11360                           const Expr *AllocExpr)
11361          : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11362            AllocExpr(AllocExpr) {}
11363      void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11364        if (!CGF.HaveInsertPoint())
11365          return;
11366        llvm::Value *Args[3];
11367        Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11368            CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11369        Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11370            Addr.getPointer(), CGF.VoidPtrTy);
11371        llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11372        Args[2] = AllocVal;
11373        CGF.EmitRuntimeCall(RTLFn, Args);
11374      }
11375    };
11376    Address VDAddr =
11377        UntiedRealAddr.isValid()
11378            ? UntiedRealAddr
11379            : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11380    CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11381        NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11382        VDAddr, Allocator);
11383    if (UntiedRealAddr.isValid())
11384      if (auto *Region =
11385              dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11386        Region->emitUntiedSwitch(CGF);
11387    return VDAddr;
11388  }
11389  return UntiedAddr;
11390}
11391
11392bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11393                                             const VarDecl *VD) const {
11394  auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11395  if (It == FunctionToUntiedTaskStackMap.end())
11396    return false;
11397  return UntiedLocalVarsStack[It->second].count(VD) > 0;
11398}
11399
11400CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11401    CodeGenModule &CGM, const OMPLoopDirective &S)
11402    : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11403  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11404  if (!NeedToPush)
11405    return;
11406  NontemporalDeclsSet &DS =
11407      CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11408  for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11409    for (const Stmt *Ref : C->private_refs()) {
11410      const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11411      const ValueDecl *VD;
11412      if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11413        VD = DRE->getDecl();
11414      } else {
11415        const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11416        assert((ME->isImplicitCXXThis() ||
11417                isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11418               "Expected member of current class.");
11419        VD = ME->getMemberDecl();
11420      }
11421      DS.insert(VD);
11422    }
11423  }
11424}
11425
11426CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11427  if (!NeedToPush)
11428    return;
11429  CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11430}
11431
11432CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11433    CodeGenFunction &CGF,
11434    const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11435                          std::pair<Address, Address>> &LocalVars)
11436    : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11437  if (!NeedToPush)
11438    return;
11439  CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11440      CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11441  CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11442}
11443
11444CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11445  if (!NeedToPush)
11446    return;
11447  CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11448}
11449
11450bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11451  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11452
11453  return llvm::any_of(
11454      CGM.getOpenMPRuntime().NontemporalDeclsStack,
11455      [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11456}
11457
11458void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11459    const OMPExecutableDirective &S,
11460    llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11461    const {
11462  llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11463  // Vars in target/task regions must be excluded completely.
11464  if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11465      isOpenMPTaskingDirective(S.getDirectiveKind())) {
11466    SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11467    getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11468    const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11469    for (const CapturedStmt::Capture &Cap : CS->captures()) {
11470      if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11471        NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11472    }
11473  }
11474  // Exclude vars in private clauses.
11475  for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11476    for (const Expr *Ref : C->varlists()) {
11477      if (!Ref->getType()->isScalarType())
11478        continue;
11479      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11480      if (!DRE)
11481        continue;
11482      NeedToCheckForLPCs.insert(DRE->getDecl());
11483    }
11484  }
11485  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11486    for (const Expr *Ref : C->varlists()) {
11487      if (!Ref->getType()->isScalarType())
11488        continue;
11489      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11490      if (!DRE)
11491        continue;
11492      NeedToCheckForLPCs.insert(DRE->getDecl());
11493    }
11494  }
11495  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11496    for (const Expr *Ref : C->varlists()) {
11497      if (!Ref->getType()->isScalarType())
11498        continue;
11499      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11500      if (!DRE)
11501        continue;
11502      NeedToCheckForLPCs.insert(DRE->getDecl());
11503    }
11504  }
11505  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11506    for (const Expr *Ref : C->varlists()) {
11507      if (!Ref->getType()->isScalarType())
11508        continue;
11509      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11510      if (!DRE)
11511        continue;
11512      NeedToCheckForLPCs.insert(DRE->getDecl());
11513    }
11514  }
11515  for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11516    for (const Expr *Ref : C->varlists()) {
11517      if (!Ref->getType()->isScalarType())
11518        continue;
11519      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11520      if (!DRE)
11521        continue;
11522      NeedToCheckForLPCs.insert(DRE->getDecl());
11523    }
11524  }
11525  for (const Decl *VD : NeedToCheckForLPCs) {
11526    for (const LastprivateConditionalData &Data :
11527         llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11528      if (Data.DeclToUniqueName.count(VD) > 0) {
11529        if (!Data.Disabled)
11530          NeedToAddForLPCsAsDisabled.insert(VD);
11531        break;
11532      }
11533    }
11534  }
11535}
11536
11537CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11538    CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11539    : CGM(CGF.CGM),
11540      Action((CGM.getLangOpts().OpenMP >= 50 &&
11541              llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11542                           [](const OMPLastprivateClause *C) {
11543                             return C->getKind() ==
11544                                    OMPC_LASTPRIVATE_conditional;
11545                           }))
11546                 ? ActionToDo::PushAsLastprivateConditional
11547                 : ActionToDo::DoNotPush) {
11548  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11549  if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11550    return;
11551  assert(Action == ActionToDo::PushAsLastprivateConditional &&
11552         "Expected a push action.");
11553  LastprivateConditionalData &Data =
11554      CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11555  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11556    if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11557      continue;
11558
11559    for (const Expr *Ref : C->varlists()) {
11560      Data.DeclToUniqueName.insert(std::make_pair(
11561          cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11562          SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11563    }
11564  }
11565  Data.IVLVal = IVLVal;
11566  Data.Fn = CGF.CurFn;
11567}
11568
11569CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11570    CodeGenFunction &CGF, const OMPExecutableDirective &S)
11571    : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11572  assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11573  if (CGM.getLangOpts().OpenMP < 50)
11574    return;
11575  llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11576  tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11577  if (!NeedToAddForLPCsAsDisabled.empty()) {
11578    Action = ActionToDo::DisableLastprivateConditional;
11579    LastprivateConditionalData &Data =
11580        CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11581    for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11582      Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11583    Data.Fn = CGF.CurFn;
11584    Data.Disabled = true;
11585  }
11586}
11587
11588CGOpenMPRuntime::LastprivateConditionalRAII
11589CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11590    CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11591  return LastprivateConditionalRAII(CGF, S);
11592}
11593
11594CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11595  if (CGM.getLangOpts().OpenMP < 50)
11596    return;
11597  if (Action == ActionToDo::DisableLastprivateConditional) {
11598    assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11599           "Expected list of disabled private vars.");
11600    CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11601  }
11602  if (Action == ActionToDo::PushAsLastprivateConditional) {
11603    assert(
11604        !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11605        "Expected list of lastprivate conditional vars.");
11606    CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11607  }
11608}
11609
11610Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11611                                                        const VarDecl *VD) {
11612  ASTContext &C = CGM.getContext();
11613  auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11614  if (I == LastprivateConditionalToTypes.end())
11615    I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11616  QualType NewType;
11617  const FieldDecl *VDField;
11618  const FieldDecl *FiredField;
11619  LValue BaseLVal;
11620  auto VI = I->getSecond().find(VD);
11621  if (VI == I->getSecond().end()) {
11622    RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11623    RD->startDefinition();
11624    VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11625    FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11626    RD->completeDefinition();
11627    NewType = C.getRecordType(RD);
11628    Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11629    BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11630    I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11631  } else {
11632    NewType = std::get<0>(VI->getSecond());
11633    VDField = std::get<1>(VI->getSecond());
11634    FiredField = std::get<2>(VI->getSecond());
11635    BaseLVal = std::get<3>(VI->getSecond());
11636  }
11637  LValue FiredLVal =
11638      CGF.EmitLValueForField(BaseLVal, FiredField);
11639  CGF.EmitStoreOfScalar(
11640      llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11641      FiredLVal);
11642  return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11643}
11644
11645namespace {
11646/// Checks if the lastprivate conditional variable is referenced in LHS.
11647class LastprivateConditionalRefChecker final
11648    : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11649  ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11650  const Expr *FoundE = nullptr;
11651  const Decl *FoundD = nullptr;
11652  StringRef UniqueDeclName;
11653  LValue IVLVal;
11654  llvm::Function *FoundFn = nullptr;
11655  SourceLocation Loc;
11656
11657public:
11658  bool VisitDeclRefExpr(const DeclRefExpr *E) {
11659    for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11660         llvm::reverse(LPM)) {
11661      auto It = D.DeclToUniqueName.find(E->getDecl());
11662      if (It == D.DeclToUniqueName.end())
11663        continue;
11664      if (D.Disabled)
11665        return false;
11666      FoundE = E;
11667      FoundD = E->getDecl()->getCanonicalDecl();
11668      UniqueDeclName = It->second;
11669      IVLVal = D.IVLVal;
11670      FoundFn = D.Fn;
11671      break;
11672    }
11673    return FoundE == E;
11674  }
11675  bool VisitMemberExpr(const MemberExpr *E) {
11676    if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11677      return false;
11678    for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11679         llvm::reverse(LPM)) {
11680      auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11681      if (It == D.DeclToUniqueName.end())
11682        continue;
11683      if (D.Disabled)
11684        return false;
11685      FoundE = E;
11686      FoundD = E->getMemberDecl()->getCanonicalDecl();
11687      UniqueDeclName = It->second;
11688      IVLVal = D.IVLVal;
11689      FoundFn = D.Fn;
11690      break;
11691    }
11692    return FoundE == E;
11693  }
11694  bool VisitStmt(const Stmt *S) {
11695    for (const Stmt *Child : S->children()) {
11696      if (!Child)
11697        continue;
11698      if (const auto *E = dyn_cast<Expr>(Child))
11699        if (!E->isGLValue())
11700          continue;
11701      if (Visit(Child))
11702        return true;
11703    }
11704    return false;
11705  }
11706  explicit LastprivateConditionalRefChecker(
11707      ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11708      : LPM(LPM) {}
11709  std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11710  getFoundData() const {
11711    return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11712  }
11713};
11714} // namespace
11715
11716void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11717                                                       LValue IVLVal,
11718                                                       StringRef UniqueDeclName,
11719                                                       LValue LVal,
11720                                                       SourceLocation Loc) {
11721  // Last updated loop counter for the lastprivate conditional var.
11722  // int<xx> last_iv = 0;
11723  llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11724  llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11725      LLIVTy, getName({UniqueDeclName, "iv"}));
11726  cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11727      IVLVal.getAlignment().getAsAlign());
11728  LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11729
11730  // Last value of the lastprivate conditional.
11731  // decltype(priv_a) last_a;
11732  llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11733      CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11734  Last->setAlignment(LVal.getAlignment().getAsAlign());
11735  LValue LastLVal = CGF.MakeAddrLValue(
11736      Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
11737
11738  // Global loop counter. Required to handle inner parallel-for regions.
11739  // iv
11740  llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11741
11742  // #pragma omp critical(a)
11743  // if (last_iv <= iv) {
11744  //   last_iv = iv;
11745  //   last_a = priv_a;
11746  // }
11747  auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11748                    Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11749    Action.Enter(CGF);
11750    llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11751    // (last_iv <= iv) ? Check if the variable is updated and store new
11752    // value in global var.
11753    llvm::Value *CmpRes;
11754    if (IVLVal.getType()->isSignedIntegerType()) {
11755      CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11756    } else {
11757      assert(IVLVal.getType()->isUnsignedIntegerType() &&
11758             "Loop iteration variable must be integer.");
11759      CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11760    }
11761    llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11762    llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11763    CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11764    // {
11765    CGF.EmitBlock(ThenBB);
11766
11767    //   last_iv = iv;
11768    CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11769
11770    //   last_a = priv_a;
11771    switch (CGF.getEvaluationKind(LVal.getType())) {
11772    case TEK_Scalar: {
11773      llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11774      CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11775      break;
11776    }
11777    case TEK_Complex: {
11778      CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11779      CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11780      break;
11781    }
11782    case TEK_Aggregate:
11783      llvm_unreachable(
11784          "Aggregates are not supported in lastprivate conditional.");
11785    }
11786    // }
11787    CGF.EmitBranch(ExitBB);
11788    // There is no need to emit line number for unconditional branch.
11789    (void)ApplyDebugLocation::CreateEmpty(CGF);
11790    CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11791  };
11792
11793  if (CGM.getLangOpts().OpenMPSimd) {
11794    // Do not emit as a critical region as no parallel region could be emitted.
11795    RegionCodeGenTy ThenRCG(CodeGen);
11796    ThenRCG(CGF);
11797  } else {
11798    emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11799  }
11800}
11801
11802void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11803                                                         const Expr *LHS) {
11804  if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11805    return;
11806  LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11807  if (!Checker.Visit(LHS))
11808    return;
11809  const Expr *FoundE;
11810  const Decl *FoundD;
11811  StringRef UniqueDeclName;
11812  LValue IVLVal;
11813  llvm::Function *FoundFn;
11814  std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11815      Checker.getFoundData();
11816  if (FoundFn != CGF.CurFn) {
11817    // Special codegen for inner parallel regions.
11818    // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11819    auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11820    assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11821           "Lastprivate conditional is not found in outer region.");
11822    QualType StructTy = std::get<0>(It->getSecond());
11823    const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11824    LValue PrivLVal = CGF.EmitLValue(FoundE);
11825    Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11826        PrivLVal.getAddress(CGF),
11827        CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11828        CGF.ConvertTypeForMem(StructTy));
11829    LValue BaseLVal =
11830        CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11831    LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11832    CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11833                            CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11834                        FiredLVal, llvm::AtomicOrdering::Unordered,
11835                        /*IsVolatile=*/true, /*isInit=*/false);
11836    return;
11837  }
11838
11839  // Private address of the lastprivate conditional in the current context.
11840  // priv_a
11841  LValue LVal = CGF.EmitLValue(FoundE);
11842  emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11843                                   FoundE->getExprLoc());
11844}
11845
11846void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11847    CodeGenFunction &CGF, const OMPExecutableDirective &D,
11848    const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11849  if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11850    return;
11851  auto Range = llvm::reverse(LastprivateConditionalStack);
11852  auto It = llvm::find_if(
11853      Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11854  if (It == Range.end() || It->Fn != CGF.CurFn)
11855    return;
11856  auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11857  assert(LPCI != LastprivateConditionalToTypes.end() &&
11858         "Lastprivates must be registered already.");
11859  SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11860  getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11861  const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11862  for (const auto &Pair : It->DeclToUniqueName) {
11863    const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11864    if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11865      continue;
11866    auto I = LPCI->getSecond().find(Pair.first);
11867    assert(I != LPCI->getSecond().end() &&
11868           "Lastprivate must be rehistered already.");
11869    // bool Cmp = priv_a.Fired != 0;
11870    LValue BaseLVal = std::get<3>(I->getSecond());
11871    LValue FiredLVal =
11872        CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11873    llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11874    llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11875    llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11876    llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11877    // if (Cmp) {
11878    CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11879    CGF.EmitBlock(ThenBB);
11880    Address Addr = CGF.GetAddrOfLocalVar(VD);
11881    LValue LVal;
11882    if (VD->getType()->isReferenceType())
11883      LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11884                                           AlignmentSource::Decl);
11885    else
11886      LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11887                                AlignmentSource::Decl);
11888    emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11889                                     D.getBeginLoc());
11890    auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11891    CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11892    // }
11893  }
11894}
11895
11896void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11897    CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11898    SourceLocation Loc) {
11899  if (CGF.getLangOpts().OpenMP < 50)
11900    return;
11901  auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11902  assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11903         "Unknown lastprivate conditional variable.");
11904  StringRef UniqueName = It->second;
11905  llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11906  // The variable was not updated in the region - exit.
11907  if (!GV)
11908    return;
11909  LValue LPLVal = CGF.MakeAddrLValue(
11910      Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
11911      PrivLVal.getType().getNonReferenceType());
11912  llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11913  CGF.EmitStoreOfScalar(Res, PrivLVal);
11914}
11915
11916llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11917    CodeGenFunction &CGF, const OMPExecutableDirective &D,
11918    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11919    const RegionCodeGenTy &CodeGen) {
11920  llvm_unreachable("Not supported in SIMD-only mode");
11921}
11922
11923llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11924    CodeGenFunction &CGF, const OMPExecutableDirective &D,
11925    const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11926    const RegionCodeGenTy &CodeGen) {
11927  llvm_unreachable("Not supported in SIMD-only mode");
11928}
11929
11930llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11931    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11932    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11933    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11934    bool Tied, unsigned &NumberOfParts) {
11935  llvm_unreachable("Not supported in SIMD-only mode");
11936}
11937
11938void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11939                                           SourceLocation Loc,
11940                                           llvm::Function *OutlinedFn,
11941                                           ArrayRef<llvm::Value *> CapturedVars,
11942                                           const Expr *IfCond,
11943                                           llvm::Value *NumThreads) {
11944  llvm_unreachable("Not supported in SIMD-only mode");
11945}
11946
11947void CGOpenMPSIMDRuntime::emitCriticalRegion(
11948    CodeGenFunction &CGF, StringRef CriticalName,
11949    const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11950    const Expr *Hint) {
11951  llvm_unreachable("Not supported in SIMD-only mode");
11952}
11953
11954void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11955                                           const RegionCodeGenTy &MasterOpGen,
11956                                           SourceLocation Loc) {
11957  llvm_unreachable("Not supported in SIMD-only mode");
11958}
11959
11960void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
11961                                           const RegionCodeGenTy &MasterOpGen,
11962                                           SourceLocation Loc,
11963                                           const Expr *Filter) {
11964  llvm_unreachable("Not supported in SIMD-only mode");
11965}
11966
11967void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11968                                            SourceLocation Loc) {
11969  llvm_unreachable("Not supported in SIMD-only mode");
11970}
11971
11972void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11973    CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11974    SourceLocation Loc) {
11975  llvm_unreachable("Not supported in SIMD-only mode");
11976}
11977
11978void CGOpenMPSIMDRuntime::emitSingleRegion(
11979    CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11980    SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11981    ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11982    ArrayRef<const Expr *> AssignmentOps) {
11983  llvm_unreachable("Not supported in SIMD-only mode");
11984}
11985
11986void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11987                                            const RegionCodeGenTy &OrderedOpGen,
11988                                            SourceLocation Loc,
11989                                            bool IsThreads) {
11990  llvm_unreachable("Not supported in SIMD-only mode");
11991}
11992
11993void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11994                                          SourceLocation Loc,
11995                                          OpenMPDirectiveKind Kind,
11996                                          bool EmitChecks,
11997                                          bool ForceSimpleCall) {
11998  llvm_unreachable("Not supported in SIMD-only mode");
11999}
12000
12001void CGOpenMPSIMDRuntime::emitForDispatchInit(
12002    CodeGenFunction &CGF, SourceLocation Loc,
12003    const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12004    bool Ordered, const DispatchRTInput &DispatchValues) {
12005  llvm_unreachable("Not supported in SIMD-only mode");
12006}
12007
12008void CGOpenMPSIMDRuntime::emitForStaticInit(
12009    CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12010    const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12011  llvm_unreachable("Not supported in SIMD-only mode");
12012}
12013
12014void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12015    CodeGenFunction &CGF, SourceLocation Loc,
12016    OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12017  llvm_unreachable("Not supported in SIMD-only mode");
12018}
12019
12020void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12021                                                     SourceLocation Loc,
12022                                                     unsigned IVSize,
12023                                                     bool IVSigned) {
12024  llvm_unreachable("Not supported in SIMD-only mode");
12025}
12026
12027void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12028                                              SourceLocation Loc,
12029                                              OpenMPDirectiveKind DKind) {
12030  llvm_unreachable("Not supported in SIMD-only mode");
12031}
12032
12033llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12034                                              SourceLocation Loc,
12035                                              unsigned IVSize, bool IVSigned,
12036                                              Address IL, Address LB,
12037                                              Address UB, Address ST) {
12038  llvm_unreachable("Not supported in SIMD-only mode");
12039}
12040
12041void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12042                                               llvm::Value *NumThreads,
12043                                               SourceLocation Loc) {
12044  llvm_unreachable("Not supported in SIMD-only mode");
12045}
12046
12047void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12048                                             ProcBindKind ProcBind,
12049                                             SourceLocation Loc) {
12050  llvm_unreachable("Not supported in SIMD-only mode");
12051}
12052
12053Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12054                                                    const VarDecl *VD,
12055                                                    Address VDAddr,
12056                                                    SourceLocation Loc) {
12057  llvm_unreachable("Not supported in SIMD-only mode");
12058}
12059
12060llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12061    const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12062    CodeGenFunction *CGF) {
12063  llvm_unreachable("Not supported in SIMD-only mode");
12064}
12065
12066Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12067    CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12068  llvm_unreachable("Not supported in SIMD-only mode");
12069}
12070
12071void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12072                                    ArrayRef<const Expr *> Vars,
12073                                    SourceLocation Loc,
12074                                    llvm::AtomicOrdering AO) {
12075  llvm_unreachable("Not supported in SIMD-only mode");
12076}
12077
12078void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12079                                       const OMPExecutableDirective &D,
12080                                       llvm::Function *TaskFunction,
12081                                       QualType SharedsTy, Address Shareds,
12082                                       const Expr *IfCond,
12083                                       const OMPTaskDataTy &Data) {
12084  llvm_unreachable("Not supported in SIMD-only mode");
12085}
12086
12087void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12088    CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12089    llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12090    const Expr *IfCond, const OMPTaskDataTy &Data) {
12091  llvm_unreachable("Not supported in SIMD-only mode");
12092}
12093
12094void CGOpenMPSIMDRuntime::emitReduction(
12095    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12096    ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12097    ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12098  assert(Options.SimpleReduction && "Only simple reduction is expected.");
12099  CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12100                                 ReductionOps, Options);
12101}
12102
12103llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12104    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12105    ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12106  llvm_unreachable("Not supported in SIMD-only mode");
12107}
12108
12109void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12110                                                SourceLocation Loc,
12111                                                bool IsWorksharingReduction) {
12112  llvm_unreachable("Not supported in SIMD-only mode");
12113}
12114
12115void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12116                                                  SourceLocation Loc,
12117                                                  ReductionCodeGen &RCG,
12118                                                  unsigned N) {
12119  llvm_unreachable("Not supported in SIMD-only mode");
12120}
12121
12122Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12123                                                  SourceLocation Loc,
12124                                                  llvm::Value *ReductionsPtr,
12125                                                  LValue SharedLVal) {
12126  llvm_unreachable("Not supported in SIMD-only mode");
12127}
12128
12129void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12130                                           SourceLocation Loc,
12131                                           const OMPTaskDataTy &Data) {
12132  llvm_unreachable("Not supported in SIMD-only mode");
12133}
12134
12135void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12136    CodeGenFunction &CGF, SourceLocation Loc,
12137    OpenMPDirectiveKind CancelRegion) {
12138  llvm_unreachable("Not supported in SIMD-only mode");
12139}
12140
12141void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12142                                         SourceLocation Loc, const Expr *IfCond,
12143                                         OpenMPDirectiveKind CancelRegion) {
12144  llvm_unreachable("Not supported in SIMD-only mode");
12145}
12146
12147void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12148    const OMPExecutableDirective &D, StringRef ParentName,
12149    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12150    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12151  llvm_unreachable("Not supported in SIMD-only mode");
12152}
12153
12154void CGOpenMPSIMDRuntime::emitTargetCall(
12155    CodeGenFunction &CGF, const OMPExecutableDirective &D,
12156    llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12157    llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12158    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12159                                     const OMPLoopDirective &D)>
12160        SizeEmitter) {
12161  llvm_unreachable("Not supported in SIMD-only mode");
12162}
12163
12164bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12165  llvm_unreachable("Not supported in SIMD-only mode");
12166}
12167
12168bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12169  llvm_unreachable("Not supported in SIMD-only mode");
12170}
12171
12172bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12173  return false;
12174}
12175
12176void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12177                                        const OMPExecutableDirective &D,
12178                                        SourceLocation Loc,
12179                                        llvm::Function *OutlinedFn,
12180                                        ArrayRef<llvm::Value *> CapturedVars) {
12181  llvm_unreachable("Not supported in SIMD-only mode");
12182}
12183
12184void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12185                                             const Expr *NumTeams,
12186                                             const Expr *ThreadLimit,
12187                                             SourceLocation Loc) {
12188  llvm_unreachable("Not supported in SIMD-only mode");
12189}
12190
12191void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12192    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12193    const Expr *Device, const RegionCodeGenTy &CodeGen,
12194    CGOpenMPRuntime::TargetDataInfo &Info) {
12195  llvm_unreachable("Not supported in SIMD-only mode");
12196}
12197
12198void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12199    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12200    const Expr *Device) {
12201  llvm_unreachable("Not supported in SIMD-only mode");
12202}
12203
12204void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12205                                           const OMPLoopDirective &D,
12206                                           ArrayRef<Expr *> NumIterations) {
12207  llvm_unreachable("Not supported in SIMD-only mode");
12208}
12209
12210void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12211                                              const OMPDependClause *C) {
12212  llvm_unreachable("Not supported in SIMD-only mode");
12213}
12214
12215void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12216                                              const OMPDoacrossClause *C) {
12217  llvm_unreachable("Not supported in SIMD-only mode");
12218}
12219
12220const VarDecl *
12221CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12222                                        const VarDecl *NativeParam) const {
12223  llvm_unreachable("Not supported in SIMD-only mode");
12224}
12225
12226Address
12227CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12228                                         const VarDecl *NativeParam,
12229                                         const VarDecl *TargetParam) const {
12230  llvm_unreachable("Not supported in SIMD-only mode");
12231}
12232