1//===------ CGOpenMPRuntimeGPU.h - Interface to OpenMP GPU Runtimes ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a generalized class for OpenMP runtime code generation
10// specialized by GPU targets NVPTX and AMDGCN.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
15#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
16
17#include "CGOpenMPRuntime.h"
18#include "CodeGenFunction.h"
19#include "clang/AST/StmtOpenMP.h"
20
21namespace clang {
22namespace CodeGen {
23
24class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
25public:
26  /// Defines the execution mode.
27  enum ExecutionMode {
28    /// SPMD execution mode (all threads are worker threads).
29    EM_SPMD,
30    /// Non-SPMD execution mode (1 master thread, others are workers).
31    EM_NonSPMD,
32    /// Unknown execution mode (orphaned directive).
33    EM_Unknown,
34  };
35private:
36  /// Parallel outlined function work for workers to execute.
37  llvm::SmallVector<llvm::Function *, 16> Work;
38
39  struct EntryFunctionState {
40    SourceLocation Loc;
41  };
42
43  ExecutionMode getExecutionMode() const;
44
45  /// Get barrier to synchronize all threads in a block.
46  void syncCTAThreads(CodeGenFunction &CGF);
47
48  /// Helper for target directive initialization.
49  void emitKernelInit(CodeGenFunction &CGF, EntryFunctionState &EST,
50                      bool IsSPMD);
51
52  /// Helper for target directive finalization.
53  void emitKernelDeinit(CodeGenFunction &CGF, EntryFunctionState &EST,
54                        bool IsSPMD);
55
56  /// Helper for generic variables globalization prolog.
57  void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc,
58                             bool WithSPMDCheck = false);
59
60  /// Helper for generic variables globalization epilog.
61  void emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck = false);
62
63  //
64  // Base class overrides.
65  //
66
67  /// Emit outlined function specialized for the Fork-Join
68  /// programming model for applicable target directives on the NVPTX device.
69  /// \param D Directive to emit.
70  /// \param ParentName Name of the function that encloses the target region.
71  /// \param OutlinedFn Outlined function value to be defined by this call.
72  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
73  /// \param IsOffloadEntry True if the outlined function is an offload entry.
74  /// An outlined function may not be an entry if, e.g. the if clause always
75  /// evaluates to false.
76  void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
77                         llvm::Function *&OutlinedFn,
78                         llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
79                         const RegionCodeGenTy &CodeGen);
80
81  /// Emit outlined function specialized for the Single Program
82  /// Multiple Data programming model for applicable target directives on the
83  /// NVPTX device.
84  /// \param D Directive to emit.
85  /// \param ParentName Name of the function that encloses the target region.
86  /// \param OutlinedFn Outlined function value to be defined by this call.
87  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
88  /// \param IsOffloadEntry True if the outlined function is an offload entry.
89  /// \param CodeGen Object containing the target statements.
90  /// An outlined function may not be an entry if, e.g. the if clause always
91  /// evaluates to false.
92  void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
93                      llvm::Function *&OutlinedFn,
94                      llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
95                      const RegionCodeGenTy &CodeGen);
96
97  /// Emit outlined function for 'target' directive on the NVPTX
98  /// device.
99  /// \param D Directive to emit.
100  /// \param ParentName Name of the function that encloses the target region.
101  /// \param OutlinedFn Outlined function value to be defined by this call.
102  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
103  /// \param IsOffloadEntry True if the outlined function is an offload entry.
104  /// An outlined function may not be an entry if, e.g. the if clause always
105  /// evaluates to false.
106  void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
107                                  StringRef ParentName,
108                                  llvm::Function *&OutlinedFn,
109                                  llvm::Constant *&OutlinedFnID,
110                                  bool IsOffloadEntry,
111                                  const RegionCodeGenTy &CodeGen) override;
112
113  /// Emits code for parallel or serial call of the \a OutlinedFn with
114  /// variables captured in a record which address is stored in \a
115  /// CapturedStruct.
116  /// This call is for the Non-SPMD Execution Mode.
117  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
118  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
119  /// \param CapturedVars A pointer to the record with the references to
120  /// variables used in \a OutlinedFn function.
121  /// \param IfCond Condition in the associated 'if' clause, if it was
122  /// specified, nullptr otherwise.
123  void emitNonSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
124                               llvm::Value *OutlinedFn,
125                               ArrayRef<llvm::Value *> CapturedVars,
126                               const Expr *IfCond);
127
128  /// Emits code for parallel or serial call of the \a OutlinedFn with
129  /// variables captured in a record which address is stored in \a
130  /// CapturedStruct.
131  /// This call is for a parallel directive within an SPMD target directive.
132  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
133  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
134  /// \param CapturedVars A pointer to the record with the references to
135  /// variables used in \a OutlinedFn function.
136  /// \param IfCond Condition in the associated 'if' clause, if it was
137  /// specified, nullptr otherwise.
138  ///
139  void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
140                            llvm::Function *OutlinedFn,
141                            ArrayRef<llvm::Value *> CapturedVars,
142                            const Expr *IfCond);
143
144protected:
145  /// Get the function name of an outlined region.
146  //  The name can be customized depending on the target.
147  //
148  StringRef getOutlinedHelperName() const override {
149    return "__omp_outlined__";
150  }
151
152  /// Check if the default location must be constant.
153  /// Constant for NVPTX for better optimization.
154  bool isDefaultLocationConstant() const override { return true; }
155
156public:
157  explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM);
158  void clear() override;
159
160  bool isTargetCodegen() const override { return true; };
161
162  /// Declare generalized virtual functions which need to be defined
163  /// by all specializations of OpenMPGPURuntime Targets like AMDGCN
164  /// and NVPTX.
165
166  /// Get the GPU warp size.
167  llvm::Value *getGPUWarpSize(CodeGenFunction &CGF);
168
169  /// Get the id of the current thread on the GPU.
170  llvm::Value *getGPUThreadID(CodeGenFunction &CGF);
171
172  /// Get the maximum number of threads in a block of the GPU.
173  llvm::Value *getGPUNumThreads(CodeGenFunction &CGF);
174
175  /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
176  /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
177  void emitProcBindClause(CodeGenFunction &CGF,
178                          llvm::omp::ProcBindKind ProcBind,
179                          SourceLocation Loc) override;
180
181  /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
182  /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
183  /// clause.
184  /// \param NumThreads An integer value of threads.
185  void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads,
186                            SourceLocation Loc) override;
187
188  /// This function ought to emit, in the general case, a call to
189  // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
190  // as these numbers are obtained through the PTX grid and block configuration.
191  /// \param NumTeams An integer expression of teams.
192  /// \param ThreadLimit An integer expression of threads.
193  void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
194                          const Expr *ThreadLimit, SourceLocation Loc) override;
195
196  /// Emits inlined function for the specified OpenMP parallel
197  //  directive.
198  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
199  /// kmp_int32 BoundID, struct context_vars*).
200  /// \param D OpenMP directive.
201  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
202  /// \param InnermostKind Kind of innermost directive (for simple directives it
203  /// is a directive itself, for combined - its innermost directive).
204  /// \param CodeGen Code generation sequence for the \a D directive.
205  llvm::Function *
206  emitParallelOutlinedFunction(const OMPExecutableDirective &D,
207                               const VarDecl *ThreadIDVar,
208                               OpenMPDirectiveKind InnermostKind,
209                               const RegionCodeGenTy &CodeGen) override;
210
211  /// Emits inlined function for the specified OpenMP teams
212  //  directive.
213  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
214  /// kmp_int32 BoundID, struct context_vars*).
215  /// \param D OpenMP directive.
216  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
217  /// \param InnermostKind Kind of innermost directive (for simple directives it
218  /// is a directive itself, for combined - its innermost directive).
219  /// \param CodeGen Code generation sequence for the \a D directive.
220  llvm::Function *
221  emitTeamsOutlinedFunction(const OMPExecutableDirective &D,
222                            const VarDecl *ThreadIDVar,
223                            OpenMPDirectiveKind InnermostKind,
224                            const RegionCodeGenTy &CodeGen) override;
225
226  /// Emits code for teams call of the \a OutlinedFn with
227  /// variables captured in a record which address is stored in \a
228  /// CapturedStruct.
229  /// \param OutlinedFn Outlined function to be run by team masters. Type of
230  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
231  /// \param CapturedVars A pointer to the record with the references to
232  /// variables used in \a OutlinedFn function.
233  ///
234  void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
235                     SourceLocation Loc, llvm::Function *OutlinedFn,
236                     ArrayRef<llvm::Value *> CapturedVars) override;
237
238  /// Emits code for parallel or serial call of the \a OutlinedFn with
239  /// variables captured in a record which address is stored in \a
240  /// CapturedStruct.
241  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
242  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
243  /// \param CapturedVars A pointer to the record with the references to
244  /// variables used in \a OutlinedFn function.
245  /// \param IfCond Condition in the associated 'if' clause, if it was
246  /// specified, nullptr otherwise.
247  /// \param NumThreads The value corresponding to the num_threads clause, if
248  /// any,
249  ///                   or nullptr.
250  void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
251                        llvm::Function *OutlinedFn,
252                        ArrayRef<llvm::Value *> CapturedVars,
253                        const Expr *IfCond, llvm::Value *NumThreads) override;
254
255  /// Emit an implicit/explicit barrier for OpenMP threads.
256  /// \param Kind Directive for which this implicit barrier call must be
257  /// generated. Must be OMPD_barrier for explicit barrier generation.
258  /// \param EmitChecks true if need to emit checks for cancellation barriers.
259  /// \param ForceSimpleCall true simple barrier call must be emitted, false if
260  /// runtime class decides which one to emit (simple or with cancellation
261  /// checks).
262  ///
263  void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
264                       OpenMPDirectiveKind Kind, bool EmitChecks = true,
265                       bool ForceSimpleCall = false) override;
266
267  /// Emits a critical region.
268  /// \param CriticalName Name of the critical region.
269  /// \param CriticalOpGen Generator for the statement associated with the given
270  /// critical region.
271  /// \param Hint Value of the 'hint' clause (optional).
272  void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
273                          const RegionCodeGenTy &CriticalOpGen,
274                          SourceLocation Loc,
275                          const Expr *Hint = nullptr) override;
276
277  /// Emit a code for reduction clause.
278  ///
279  /// \param Privates List of private copies for original reduction arguments.
280  /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
281  /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
282  /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
283  /// or 'operator binop(LHS, RHS)'.
284  /// \param Options List of options for reduction codegen:
285  ///     WithNowait true if parent directive has also nowait clause, false
286  ///     otherwise.
287  ///     SimpleReduction Emit reduction operation only. Used for omp simd
288  ///     directive on the host.
289  ///     ReductionKind The kind of reduction to perform.
290  void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
291                     ArrayRef<const Expr *> Privates,
292                     ArrayRef<const Expr *> LHSExprs,
293                     ArrayRef<const Expr *> RHSExprs,
294                     ArrayRef<const Expr *> ReductionOps,
295                     ReductionOptionsTy Options) override;
296
297  /// Returns specified OpenMP runtime function for the current OpenMP
298  /// implementation.  Specialized for the NVPTX device.
299  /// \param Function OpenMP runtime function.
300  /// \return Specified function.
301  llvm::FunctionCallee createNVPTXRuntimeFunction(unsigned Function);
302
303  /// Translates the native parameter of outlined function if this is required
304  /// for target.
305  /// \param FD Field decl from captured record for the parameter.
306  /// \param NativeParam Parameter itself.
307  const VarDecl *translateParameter(const FieldDecl *FD,
308                                    const VarDecl *NativeParam) const override;
309
310  /// Gets the address of the native argument basing on the address of the
311  /// target-specific parameter.
312  /// \param NativeParam Parameter itself.
313  /// \param TargetParam Corresponding target-specific parameter.
314  Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam,
315                              const VarDecl *TargetParam) const override;
316
317  /// Emits call of the outlined function with the provided arguments,
318  /// translating these arguments to correct target-specific arguments.
319  void emitOutlinedFunctionCall(
320      CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
321      ArrayRef<llvm::Value *> Args = std::nullopt) const override;
322
323  /// Emits OpenMP-specific function prolog.
324  /// Required for device constructs.
325  void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override;
326
327  /// Gets the OpenMP-specific address of the local variable.
328  Address getAddressOfLocalVariable(CodeGenFunction &CGF,
329                                    const VarDecl *VD) override;
330
331  /// Target codegen is specialized based on two data-sharing modes: CUDA, in
332  /// which the local variables are actually global threadlocal, and Generic, in
333  /// which the local variables are placed in global memory if they may escape
334  /// their declaration context.
335  enum DataSharingMode {
336    /// CUDA data sharing mode.
337    CUDA,
338    /// Generic data-sharing mode.
339    Generic,
340  };
341
342  /// Cleans up references to the objects in finished function.
343  ///
344  void functionFinished(CodeGenFunction &CGF) override;
345
346  /// Choose a default value for the dist_schedule clause.
347  void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
348      const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
349      llvm::Value *&Chunk) const override;
350
351  /// Choose a default value for the schedule clause.
352  void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
353      const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
354      const Expr *&ChunkExpr) const override;
355
356  /// Adjust some parameters for the target-based directives, like addresses of
357  /// the variables captured by reference in lambdas.
358  void adjustTargetSpecificDataForLambdas(
359      CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
360
361  /// Perform check on requires decl to ensure that target architecture
362  /// supports unified addressing
363  void processRequiresDirective(const OMPRequiresDecl *D) override;
364
365  /// Checks if the variable has associated OMPAllocateDeclAttr attribute with
366  /// the predefined allocator and translates it into the corresponding address
367  /// space.
368  bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override;
369
370private:
371  /// Track the execution mode when codegening directives within a target
372  /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
373  /// target region and used by containing directives such as 'parallel'
374  /// to emit optimized code.
375  ExecutionMode CurrentExecutionMode = EM_Unknown;
376
377  /// true if currently emitting code for target/teams/distribute region, false
378  /// - otherwise.
379  bool IsInTTDRegion = false;
380
381  /// Map between an outlined function and its wrapper.
382  llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap;
383
384  /// Emit function which wraps the outline parallel region
385  /// and controls the parameters which are passed to this function.
386  /// The wrapper ensures that the outlined function is called
387  /// with the correct arguments when data is shared.
388  llvm::Function *createParallelDataSharingWrapper(
389      llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D);
390
391  /// The data for the single globalized variable.
392  struct MappedVarData {
393    /// Corresponding field in the global record.
394    llvm::Value *GlobalizedVal = nullptr;
395    /// Corresponding address.
396    Address PrivateAddr = Address::invalid();
397  };
398  /// The map of local variables to their addresses in the global memory.
399  using DeclToAddrMapTy = llvm::MapVector<const Decl *, MappedVarData>;
400  /// Set of the parameters passed by value escaping OpenMP context.
401  using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
402  struct FunctionData {
403    DeclToAddrMapTy LocalVarData;
404    EscapedParamsTy EscapedParameters;
405    llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
406    llvm::SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4>
407        EscapedVariableLengthDeclsAddrs;
408    std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
409  };
410  /// Maps the function to the list of the globalized variables with their
411  /// addresses.
412  llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls;
413  llvm::GlobalVariable *KernelTeamsReductionPtr = nullptr;
414  /// List of the records with the list of fields for the reductions across the
415  /// teams. Used to build the intermediate buffer for the fast teams
416  /// reductions.
417  /// All the records are gathered into a union `union.type` is created.
418  llvm::SmallVector<const RecordDecl *, 4> TeamsReductions;
419  /// Pair of the Non-SPMD team and all reductions variables in this team
420  /// region.
421  std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>>
422      TeamAndReductions;
423};
424
425} // CodeGen namespace.
426} // clang namespace.
427
428#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
429