1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This provides a class for OpenMP runtime code generation. 10// 11//===----------------------------------------------------------------------===// 12 13#include "CGOpenMPRuntime.h" 14#include "CGCXXABI.h" 15#include "CGCleanup.h" 16#include "CGRecordLayout.h" 17#include "CodeGenFunction.h" 18#include "clang/AST/APValue.h" 19#include "clang/AST/Attr.h" 20#include "clang/AST/Decl.h" 21#include "clang/AST/OpenMPClause.h" 22#include "clang/AST/StmtOpenMP.h" 23#include "clang/AST/StmtVisitor.h" 24#include "clang/Basic/BitmaskEnum.h" 25#include "clang/Basic/FileManager.h" 26#include "clang/Basic/OpenMPKinds.h" 27#include "clang/Basic/SourceManager.h" 28#include "clang/CodeGen/ConstantInitBuilder.h" 29#include "llvm/ADT/ArrayRef.h" 30#include "llvm/ADT/SetOperations.h" 31#include "llvm/ADT/StringExtras.h" 32#include "llvm/Bitcode/BitcodeReader.h" 33#include "llvm/IR/Constants.h" 34#include "llvm/IR/DerivedTypes.h" 35#include "llvm/IR/GlobalValue.h" 36#include "llvm/IR/Value.h" 37#include "llvm/Support/AtomicOrdering.h" 38#include "llvm/Support/Format.h" 39#include "llvm/Support/raw_ostream.h" 40#include <cassert> 41#include <numeric> 42 43using namespace clang; 44using namespace CodeGen; 45using namespace llvm::omp; 46 47namespace { 48/// Base class for handling code generation inside OpenMP regions. 49class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108}; 109 110/// API for captured statement code generation in OpenMP constructs. 111class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141}; 142 143/// API for captured statement code generation in OpenMP constructs. 144class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230}; 231 232/// API for inlined captured statement code generation in OpenMP 233/// constructs. 234class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313}; 314 315/// API for captured statement code generation in OpenMP target 316/// constructs. For this captures, implicit parameters are used instead of the 317/// captured fields. The name of the target region has to be unique in a given 318/// application so it is provided by the client, because only the client has 319/// the information to generate that. 320class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340private: 341 StringRef HelperName; 342}; 343 344static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346} 347/// API for generation of expressions captured in a innermost OpenMP 348/// region. 349class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405}; 406 407/// RAII for emitting code of OpenMP constructs. 408class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 bool NoInheritance = false; 414 415public: 416 /// Constructs region for combined constructs. 417 /// \param CodeGen Code generation sequence for combined directives. Includes 418 /// a list of functions used for code generation of implicitly inlined 419 /// regions. 420 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 421 OpenMPDirectiveKind Kind, bool HasCancel, 422 bool NoInheritance = true) 423 : CGF(CGF), NoInheritance(NoInheritance) { 424 // Start emission for the construct. 425 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 426 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 427 if (NoInheritance) { 428 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 429 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 430 CGF.LambdaThisCaptureField = nullptr; 431 BlockInfo = CGF.BlockInfo; 432 CGF.BlockInfo = nullptr; 433 } 434 } 435 436 ~InlinedOpenMPRegionRAII() { 437 // Restore original CapturedStmtInfo only if we're done with code emission. 438 auto *OldCSI = 439 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 440 delete CGF.CapturedStmtInfo; 441 CGF.CapturedStmtInfo = OldCSI; 442 if (NoInheritance) { 443 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 444 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 445 CGF.BlockInfo = BlockInfo; 446 } 447 } 448}; 449 450/// Values for bit flags used in the ident_t to describe the fields. 451/// All enumeric elements are named and described in accordance with the code 452/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 453enum OpenMPLocationFlags : unsigned { 454 /// Use trampoline for internal microtask. 455 OMP_IDENT_IMD = 0x01, 456 /// Use c-style ident structure. 457 OMP_IDENT_KMPC = 0x02, 458 /// Atomic reduction option for kmpc_reduce. 459 OMP_ATOMIC_REDUCE = 0x10, 460 /// Explicit 'barrier' directive. 461 OMP_IDENT_BARRIER_EXPL = 0x20, 462 /// Implicit barrier in code. 463 OMP_IDENT_BARRIER_IMPL = 0x40, 464 /// Implicit barrier in 'for' directive. 465 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 466 /// Implicit barrier in 'sections' directive. 467 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 468 /// Implicit barrier in 'single' directive. 469 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 470 /// Call of __kmp_for_static_init for static loop. 471 OMP_IDENT_WORK_LOOP = 0x200, 472 /// Call of __kmp_for_static_init for sections. 473 OMP_IDENT_WORK_SECTIONS = 0x400, 474 /// Call of __kmp_for_static_init for distribute. 475 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 477}; 478 479namespace { 480LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 481/// Values for bit flags for marking which requires clauses have been used. 482enum OpenMPOffloadingRequiresDirFlags : int64_t { 483 /// flag undefined. 484 OMP_REQ_UNDEFINED = 0x000, 485 /// no requires clause present. 486 OMP_REQ_NONE = 0x001, 487 /// reverse_offload clause. 488 OMP_REQ_REVERSE_OFFLOAD = 0x002, 489 /// unified_address clause. 490 OMP_REQ_UNIFIED_ADDRESS = 0x004, 491 /// unified_shared_memory clause. 492 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 493 /// dynamic_allocators clause. 494 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 495 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 496}; 497 498enum OpenMPOffloadingReservedDeviceIDs { 499 /// Device ID if the device was not defined, runtime should get it 500 /// from environment variables in the spec. 501 OMP_DEVICEID_UNDEF = -1, 502}; 503} // anonymous namespace 504 505/// Describes ident structure that describes a source location. 506/// All descriptions are taken from 507/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 508/// Original structure: 509/// typedef struct ident { 510/// kmp_int32 reserved_1; /**< might be used in Fortran; 511/// see above */ 512/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 513/// KMP_IDENT_KMPC identifies this union 514/// member */ 515/// kmp_int32 reserved_2; /**< not really used in Fortran any more; 516/// see above */ 517///#if USE_ITT_BUILD 518/// /* but currently used for storing 519/// region-specific ITT */ 520/// /* contextual information. */ 521///#endif /* USE_ITT_BUILD */ 522/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 523/// C++ */ 524/// char const *psource; /**< String describing the source location. 525/// The string is composed of semi-colon separated 526// fields which describe the source file, 527/// the function and a pair of line numbers that 528/// delimit the construct. 529/// */ 530/// } ident_t; 531enum IdentFieldIndex { 532 /// might be used in Fortran 533 IdentField_Reserved_1, 534 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 535 IdentField_Flags, 536 /// Not really used in Fortran any more 537 IdentField_Reserved_2, 538 /// Source[4] in Fortran, do not use for C++ 539 IdentField_Reserved_3, 540 /// String describing the source location. The string is composed of 541 /// semi-colon separated fields which describe the source file, the function 542 /// and a pair of line numbers that delimit the construct. 543 IdentField_PSource 544}; 545 546/// Schedule types for 'omp for' loops (these enumerators are taken from 547/// the enum sched_type in kmp.h). 548enum OpenMPSchedType { 549 /// Lower bound for default (unordered) versions. 550 OMP_sch_lower = 32, 551 OMP_sch_static_chunked = 33, 552 OMP_sch_static = 34, 553 OMP_sch_dynamic_chunked = 35, 554 OMP_sch_guided_chunked = 36, 555 OMP_sch_runtime = 37, 556 OMP_sch_auto = 38, 557 /// static with chunk adjustment (e.g., simd) 558 OMP_sch_static_balanced_chunked = 45, 559 /// Lower bound for 'ordered' versions. 560 OMP_ord_lower = 64, 561 OMP_ord_static_chunked = 65, 562 OMP_ord_static = 66, 563 OMP_ord_dynamic_chunked = 67, 564 OMP_ord_guided_chunked = 68, 565 OMP_ord_runtime = 69, 566 OMP_ord_auto = 70, 567 OMP_sch_default = OMP_sch_static, 568 /// dist_schedule types 569 OMP_dist_sch_static_chunked = 91, 570 OMP_dist_sch_static = 92, 571 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 572 /// Set if the monotonic schedule modifier was present. 573 OMP_sch_modifier_monotonic = (1 << 29), 574 /// Set if the nonmonotonic schedule modifier was present. 575 OMP_sch_modifier_nonmonotonic = (1 << 30), 576}; 577 578/// A basic class for pre|post-action for advanced codegen sequence for OpenMP 579/// region. 580class CleanupTy final : public EHScopeStack::Cleanup { 581 PrePostActionTy *Action; 582 583public: 584 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 585 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 586 if (!CGF.HaveInsertPoint()) 587 return; 588 Action->Exit(CGF); 589 } 590}; 591 592} // anonymous namespace 593 594void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 595 CodeGenFunction::RunCleanupsScope Scope(CGF); 596 if (PrePostAction) { 597 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 598 Callback(CodeGen, CGF, *PrePostAction); 599 } else { 600 PrePostActionTy Action; 601 Callback(CodeGen, CGF, Action); 602 } 603} 604 605/// Check if the combiner is a call to UDR combiner and if it is so return the 606/// UDR decl used for reduction. 607static const OMPDeclareReductionDecl * 608getReductionInit(const Expr *ReductionOp) { 609 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 610 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 611 if (const auto *DRE = 612 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 613 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 614 return DRD; 615 return nullptr; 616} 617 618static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 619 const OMPDeclareReductionDecl *DRD, 620 const Expr *InitOp, 621 Address Private, Address Original, 622 QualType Ty) { 623 if (DRD->getInitializer()) { 624 std::pair<llvm::Function *, llvm::Function *> Reduction = 625 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 626 const auto *CE = cast<CallExpr>(InitOp); 627 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 628 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 629 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 630 const auto *LHSDRE = 631 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 632 const auto *RHSDRE = 633 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 634 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 635 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 636 [=]() { return Private; }); 637 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 638 [=]() { return Original; }); 639 (void)PrivateScope.Privatize(); 640 RValue Func = RValue::get(Reduction.second); 641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 642 CGF.EmitIgnoredExpr(InitOp); 643 } else { 644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 646 auto *GV = new llvm::GlobalVariable( 647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 648 llvm::GlobalValue::PrivateLinkage, Init, Name); 649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 650 RValue InitRVal; 651 switch (CGF.getEvaluationKind(Ty)) { 652 case TEK_Scalar: 653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 654 break; 655 case TEK_Complex: 656 InitRVal = 657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 658 break; 659 case TEK_Aggregate: { 660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 663 /*IsInitializer=*/false); 664 return; 665 } 666 } 667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 670 /*IsInitializer=*/false); 671 } 672} 673 674/// Emit initialization of arrays of complex types. 675/// \param DestAddr Address of the array. 676/// \param Type Type of array. 677/// \param Init Initial expression of array. 678/// \param SrcAddr Address of the original array. 679static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 680 QualType Type, bool EmitDeclareReductionInit, 681 const Expr *Init, 682 const OMPDeclareReductionDecl *DRD, 683 Address SrcAddr = Address::invalid()) { 684 // Perform element-by-element initialization. 685 QualType ElementTy; 686 687 // Drill down to the base element type on both arrays. 688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 690 DestAddr = 691 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 692 if (DRD) 693 SrcAddr = 694 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 695 696 llvm::Value *SrcBegin = nullptr; 697 if (DRD) 698 SrcBegin = SrcAddr.getPointer(); 699 llvm::Value *DestBegin = DestAddr.getPointer(); 700 // Cast from pointer to array type to pointer to single element. 701 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 702 // The basic structure here is a while-do loop. 703 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 704 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 705 llvm::Value *IsEmpty = 706 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 707 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 708 709 // Enter the loop body, making that address the current address. 710 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 711 CGF.EmitBlock(BodyBB); 712 713 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 714 715 llvm::PHINode *SrcElementPHI = nullptr; 716 Address SrcElementCurrent = Address::invalid(); 717 if (DRD) { 718 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 719 "omp.arraycpy.srcElementPast"); 720 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 721 SrcElementCurrent = 722 Address(SrcElementPHI, 723 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 724 } 725 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 726 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 727 DestElementPHI->addIncoming(DestBegin, EntryBB); 728 Address DestElementCurrent = 729 Address(DestElementPHI, 730 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 731 732 // Emit copy. 733 { 734 CodeGenFunction::RunCleanupsScope InitScope(CGF); 735 if (EmitDeclareReductionInit) { 736 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 737 SrcElementCurrent, ElementTy); 738 } else 739 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 740 /*IsInitializer=*/false); 741 } 742 743 if (DRD) { 744 // Shift the address forward by one element. 745 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 746 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 747 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 748 } 749 750 // Shift the address forward by one element. 751 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 752 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 753 // Check whether we've reached the end. 754 llvm::Value *Done = 755 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 756 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 757 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 758 759 // Done. 760 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 761} 762 763LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 764 return CGF.EmitOMPSharedLValue(E); 765} 766 767LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 768 const Expr *E) { 769 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 770 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 771 return LValue(); 772} 773 774void ReductionCodeGen::emitAggregateInitialization( 775 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 776 const OMPDeclareReductionDecl *DRD) { 777 // Emit VarDecl with copy init for arrays. 778 // Get the address of the original variable captured in current 779 // captured region. 780 const auto *PrivateVD = 781 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 782 bool EmitDeclareReductionInit = 783 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 784 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 785 EmitDeclareReductionInit, 786 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 787 : PrivateVD->getInit(), 788 DRD, SharedLVal.getAddress(CGF)); 789} 790 791ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 792 ArrayRef<const Expr *> Origs, 793 ArrayRef<const Expr *> Privates, 794 ArrayRef<const Expr *> ReductionOps) { 795 ClausesData.reserve(Shareds.size()); 796 SharedAddresses.reserve(Shareds.size()); 797 Sizes.reserve(Shareds.size()); 798 BaseDecls.reserve(Shareds.size()); 799 const auto *IOrig = Origs.begin(); 800 const auto *IPriv = Privates.begin(); 801 const auto *IRed = ReductionOps.begin(); 802 for (const Expr *Ref : Shareds) { 803 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 804 std::advance(IOrig, 1); 805 std::advance(IPriv, 1); 806 std::advance(IRed, 1); 807 } 808} 809 810void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 811 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 812 "Number of generated lvalues must be exactly N."); 813 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 814 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 815 SharedAddresses.emplace_back(First, Second); 816 if (ClausesData[N].Shared == ClausesData[N].Ref) { 817 OrigAddresses.emplace_back(First, Second); 818 } else { 819 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 820 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 821 OrigAddresses.emplace_back(First, Second); 822 } 823} 824 825void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 826 const auto *PrivateVD = 827 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 828 QualType PrivateType = PrivateVD->getType(); 829 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 830 if (!PrivateType->isVariablyModifiedType()) { 831 Sizes.emplace_back( 832 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 833 nullptr); 834 return; 835 } 836 llvm::Value *Size; 837 llvm::Value *SizeInChars; 838 auto *ElemType = 839 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 840 ->getElementType(); 841 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 842 if (AsArraySection) { 843 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 844 OrigAddresses[N].first.getPointer(CGF)); 845 Size = CGF.Builder.CreateNUWAdd( 846 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 847 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 848 } else { 849 SizeInChars = 850 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 851 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 852 } 853 Sizes.emplace_back(SizeInChars, Size); 854 CodeGenFunction::OpaqueValueMapping OpaqueMap( 855 CGF, 856 cast<OpaqueValueExpr>( 857 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 858 RValue::get(Size)); 859 CGF.EmitVariablyModifiedType(PrivateType); 860} 861 862void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 863 llvm::Value *Size) { 864 const auto *PrivateVD = 865 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 866 QualType PrivateType = PrivateVD->getType(); 867 if (!PrivateType->isVariablyModifiedType()) { 868 assert(!Size && !Sizes[N].second && 869 "Size should be nullptr for non-variably modified reduction " 870 "items."); 871 return; 872 } 873 CodeGenFunction::OpaqueValueMapping OpaqueMap( 874 CGF, 875 cast<OpaqueValueExpr>( 876 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 877 RValue::get(Size)); 878 CGF.EmitVariablyModifiedType(PrivateType); 879} 880 881void ReductionCodeGen::emitInitialization( 882 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 883 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 884 assert(SharedAddresses.size() > N && "No variable was generated"); 885 const auto *PrivateVD = 886 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 887 const OMPDeclareReductionDecl *DRD = 888 getReductionInit(ClausesData[N].ReductionOp); 889 QualType PrivateType = PrivateVD->getType(); 890 PrivateAddr = CGF.Builder.CreateElementBitCast( 891 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 892 QualType SharedType = SharedAddresses[N].first.getType(); 893 SharedLVal = CGF.MakeAddrLValue( 894 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 895 CGF.ConvertTypeForMem(SharedType)), 896 SharedType, SharedAddresses[N].first.getBaseInfo(), 897 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 898 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 899 if (DRD && DRD->getInitializer()) 900 (void)DefaultInit(CGF); 901 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 902 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 903 (void)DefaultInit(CGF); 904 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 905 PrivateAddr, SharedLVal.getAddress(CGF), 906 SharedLVal.getType()); 907 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 908 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 909 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 910 PrivateVD->getType().getQualifiers(), 911 /*IsInitializer=*/false); 912 } 913} 914 915bool ReductionCodeGen::needCleanups(unsigned N) { 916 const auto *PrivateVD = 917 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 918 QualType PrivateType = PrivateVD->getType(); 919 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 920 return DTorKind != QualType::DK_none; 921} 922 923void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 924 Address PrivateAddr) { 925 const auto *PrivateVD = 926 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 927 QualType PrivateType = PrivateVD->getType(); 928 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 929 if (needCleanups(N)) { 930 PrivateAddr = CGF.Builder.CreateElementBitCast( 931 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 932 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 933 } 934} 935 936static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 937 LValue BaseLV) { 938 BaseTy = BaseTy.getNonReferenceType(); 939 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 940 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 941 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 942 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 943 } else { 944 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 945 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 946 } 947 BaseTy = BaseTy->getPointeeType(); 948 } 949 return CGF.MakeAddrLValue( 950 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 951 CGF.ConvertTypeForMem(ElTy)), 952 BaseLV.getType(), BaseLV.getBaseInfo(), 953 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 954} 955 956static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 957 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 958 llvm::Value *Addr) { 959 Address Tmp = Address::invalid(); 960 Address TopTmp = Address::invalid(); 961 Address MostTopTmp = Address::invalid(); 962 BaseTy = BaseTy.getNonReferenceType(); 963 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 964 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 965 Tmp = CGF.CreateMemTemp(BaseTy); 966 if (TopTmp.isValid()) 967 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 968 else 969 MostTopTmp = Tmp; 970 TopTmp = Tmp; 971 BaseTy = BaseTy->getPointeeType(); 972 } 973 llvm::Type *Ty = BaseLVType; 974 if (Tmp.isValid()) 975 Ty = Tmp.getElementType(); 976 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 977 if (Tmp.isValid()) { 978 CGF.Builder.CreateStore(Addr, Tmp); 979 return MostTopTmp; 980 } 981 return Address(Addr, BaseLVAlignment); 982} 983 984static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 985 const VarDecl *OrigVD = nullptr; 986 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 987 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 988 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 989 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 990 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 991 Base = TempASE->getBase()->IgnoreParenImpCasts(); 992 DE = cast<DeclRefExpr>(Base); 993 OrigVD = cast<VarDecl>(DE->getDecl()); 994 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 995 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 996 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 997 Base = TempASE->getBase()->IgnoreParenImpCasts(); 998 DE = cast<DeclRefExpr>(Base); 999 OrigVD = cast<VarDecl>(DE->getDecl()); 1000 } 1001 return OrigVD; 1002} 1003 1004Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1005 Address PrivateAddr) { 1006 const DeclRefExpr *DE; 1007 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1008 BaseDecls.emplace_back(OrigVD); 1009 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1010 LValue BaseLValue = 1011 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1012 OriginalBaseLValue); 1013 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1014 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1015 llvm::Value *PrivatePointer = 1016 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1017 PrivateAddr.getPointer(), 1018 SharedAddresses[N].first.getAddress(CGF).getType()); 1019 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1020 return castToBase(CGF, OrigVD->getType(), 1021 SharedAddresses[N].first.getType(), 1022 OriginalBaseLValue.getAddress(CGF).getType(), 1023 OriginalBaseLValue.getAlignment(), Ptr); 1024 } 1025 BaseDecls.emplace_back( 1026 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1027 return PrivateAddr; 1028} 1029 1030bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1031 const OMPDeclareReductionDecl *DRD = 1032 getReductionInit(ClausesData[N].ReductionOp); 1033 return DRD && DRD->getInitializer(); 1034} 1035 1036LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1037 return CGF.EmitLoadOfPointerLValue( 1038 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1039 getThreadIDVariable()->getType()->castAs<PointerType>()); 1040} 1041 1042void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1043 if (!CGF.HaveInsertPoint()) 1044 return; 1045 // 1.2.2 OpenMP Language Terminology 1046 // Structured block - An executable statement with a single entry at the 1047 // top and a single exit at the bottom. 1048 // The point of exit cannot be a branch out of the structured block. 1049 // longjmp() and throw() must not violate the entry/exit criteria. 1050 CGF.EHStack.pushTerminate(); 1051 if (S) 1052 CGF.incrementProfileCounter(S); 1053 CodeGen(CGF); 1054 CGF.EHStack.popTerminate(); 1055} 1056 1057LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1058 CodeGenFunction &CGF) { 1059 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1060 getThreadIDVariable()->getType(), 1061 AlignmentSource::Decl); 1062} 1063 1064static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1065 QualType FieldTy) { 1066 auto *Field = FieldDecl::Create( 1067 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1068 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1069 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1070 Field->setAccess(AS_public); 1071 DC->addDecl(Field); 1072 return Field; 1073} 1074 1075CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1076 StringRef Separator) 1077 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1078 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1079 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1080 1081 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1082 OMPBuilder.initialize(); 1083 loadOffloadInfoMetadata(); 1084} 1085 1086void CGOpenMPRuntime::clear() { 1087 InternalVars.clear(); 1088 // Clean non-target variable declarations possibly used only in debug info. 1089 for (const auto &Data : EmittedNonTargetVariables) { 1090 if (!Data.getValue().pointsToAliveValue()) 1091 continue; 1092 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1093 if (!GV) 1094 continue; 1095 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1096 continue; 1097 GV->eraseFromParent(); 1098 } 1099} 1100 1101std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1102 SmallString<128> Buffer; 1103 llvm::raw_svector_ostream OS(Buffer); 1104 StringRef Sep = FirstSeparator; 1105 for (StringRef Part : Parts) { 1106 OS << Sep << Part; 1107 Sep = Separator; 1108 } 1109 return std::string(OS.str()); 1110} 1111 1112static llvm::Function * 1113emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1114 const Expr *CombinerInitializer, const VarDecl *In, 1115 const VarDecl *Out, bool IsCombiner) { 1116 // void .omp_combiner.(Ty *in, Ty *out); 1117 ASTContext &C = CGM.getContext(); 1118 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1119 FunctionArgList Args; 1120 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1121 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1122 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1123 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1124 Args.push_back(&OmpOutParm); 1125 Args.push_back(&OmpInParm); 1126 const CGFunctionInfo &FnInfo = 1127 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1128 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1129 std::string Name = CGM.getOpenMPRuntime().getName( 1130 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1131 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1132 Name, &CGM.getModule()); 1133 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1134 if (CGM.getLangOpts().Optimize) { 1135 Fn->removeFnAttr(llvm::Attribute::NoInline); 1136 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1137 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1138 } 1139 CodeGenFunction CGF(CGM); 1140 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1141 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1142 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1143 Out->getLocation()); 1144 CodeGenFunction::OMPPrivateScope Scope(CGF); 1145 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1146 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1147 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1148 .getAddress(CGF); 1149 }); 1150 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1151 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1152 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1153 .getAddress(CGF); 1154 }); 1155 (void)Scope.Privatize(); 1156 if (!IsCombiner && Out->hasInit() && 1157 !CGF.isTrivialInitializer(Out->getInit())) { 1158 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1159 Out->getType().getQualifiers(), 1160 /*IsInitializer=*/true); 1161 } 1162 if (CombinerInitializer) 1163 CGF.EmitIgnoredExpr(CombinerInitializer); 1164 Scope.ForceCleanup(); 1165 CGF.FinishFunction(); 1166 return Fn; 1167} 1168 1169void CGOpenMPRuntime::emitUserDefinedReduction( 1170 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1171 if (UDRMap.count(D) > 0) 1172 return; 1173 llvm::Function *Combiner = emitCombinerOrInitializer( 1174 CGM, D->getType(), D->getCombiner(), 1175 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1176 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1177 /*IsCombiner=*/true); 1178 llvm::Function *Initializer = nullptr; 1179 if (const Expr *Init = D->getInitializer()) { 1180 Initializer = emitCombinerOrInitializer( 1181 CGM, D->getType(), 1182 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1183 : nullptr, 1184 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1185 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1186 /*IsCombiner=*/false); 1187 } 1188 UDRMap.try_emplace(D, Combiner, Initializer); 1189 if (CGF) { 1190 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1191 Decls.second.push_back(D); 1192 } 1193} 1194 1195std::pair<llvm::Function *, llvm::Function *> 1196CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1197 auto I = UDRMap.find(D); 1198 if (I != UDRMap.end()) 1199 return I->second; 1200 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1201 return UDRMap.lookup(D); 1202} 1203 1204namespace { 1205// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1206// Builder if one is present. 1207struct PushAndPopStackRAII { 1208 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1209 bool HasCancel, llvm::omp::Directive Kind) 1210 : OMPBuilder(OMPBuilder) { 1211 if (!OMPBuilder) 1212 return; 1213 1214 // The following callback is the crucial part of clangs cleanup process. 1215 // 1216 // NOTE: 1217 // Once the OpenMPIRBuilder is used to create parallel regions (and 1218 // similar), the cancellation destination (Dest below) is determined via 1219 // IP. That means if we have variables to finalize we split the block at IP, 1220 // use the new block (=BB) as destination to build a JumpDest (via 1221 // getJumpDestInCurrentScope(BB)) which then is fed to 1222 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1223 // to push & pop an FinalizationInfo object. 1224 // The FiniCB will still be needed but at the point where the 1225 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1226 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1227 assert(IP.getBlock()->end() == IP.getPoint() && 1228 "Clang CG should cause non-terminated block!"); 1229 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1230 CGF.Builder.restoreIP(IP); 1231 CodeGenFunction::JumpDest Dest = 1232 CGF.getOMPCancelDestination(OMPD_parallel); 1233 CGF.EmitBranchThroughCleanup(Dest); 1234 }; 1235 1236 // TODO: Remove this once we emit parallel regions through the 1237 // OpenMPIRBuilder as it can do this setup internally. 1238 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1239 OMPBuilder->pushFinalizationCB(std::move(FI)); 1240 } 1241 ~PushAndPopStackRAII() { 1242 if (OMPBuilder) 1243 OMPBuilder->popFinalizationCB(); 1244 } 1245 llvm::OpenMPIRBuilder *OMPBuilder; 1246}; 1247} // namespace 1248 1249static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1250 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1251 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1252 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1253 assert(ThreadIDVar->getType()->isPointerType() && 1254 "thread id variable must be of type kmp_int32 *"); 1255 CodeGenFunction CGF(CGM, true); 1256 bool HasCancel = false; 1257 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1258 HasCancel = OPD->hasCancel(); 1259 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1260 HasCancel = OPD->hasCancel(); 1261 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1262 HasCancel = OPSD->hasCancel(); 1263 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1264 HasCancel = OPFD->hasCancel(); 1265 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1266 HasCancel = OPFD->hasCancel(); 1267 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 else if (const auto *OPFD = 1270 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 else if (const auto *OPFD = 1273 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1274 HasCancel = OPFD->hasCancel(); 1275 1276 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1277 // parallel region to make cancellation barriers work properly. 1278 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1279 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1280 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1281 HasCancel, OutlinedHelperName); 1282 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1283 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1284} 1285 1286llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1287 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1289 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1290 return emitParallelOrTeamsOutlinedFunction( 1291 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1292} 1293 1294llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1295 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1296 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1297 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1298 return emitParallelOrTeamsOutlinedFunction( 1299 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1300} 1301 1302llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1303 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1304 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1305 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1306 bool Tied, unsigned &NumberOfParts) { 1307 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1308 PrePostActionTy &) { 1309 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1310 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1311 llvm::Value *TaskArgs[] = { 1312 UpLoc, ThreadID, 1313 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1314 TaskTVar->getType()->castAs<PointerType>()) 1315 .getPointer(CGF)}; 1316 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1317 CGM.getModule(), OMPRTL___kmpc_omp_task), 1318 TaskArgs); 1319 }; 1320 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1321 UntiedCodeGen); 1322 CodeGen.setAction(Action); 1323 assert(!ThreadIDVar->getType()->isPointerType() && 1324 "thread id variable must be of type kmp_int32 for tasks"); 1325 const OpenMPDirectiveKind Region = 1326 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1327 : OMPD_task; 1328 const CapturedStmt *CS = D.getCapturedStmt(Region); 1329 bool HasCancel = false; 1330 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1331 HasCancel = TD->hasCancel(); 1332 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1333 HasCancel = TD->hasCancel(); 1334 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1335 HasCancel = TD->hasCancel(); 1336 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1337 HasCancel = TD->hasCancel(); 1338 1339 CodeGenFunction CGF(CGM, true); 1340 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1341 InnermostKind, HasCancel, Action); 1342 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1343 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1344 if (!Tied) 1345 NumberOfParts = Action.getNumberOfParts(); 1346 return Res; 1347} 1348 1349static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1350 const RecordDecl *RD, const CGRecordLayout &RL, 1351 ArrayRef<llvm::Constant *> Data) { 1352 llvm::StructType *StructTy = RL.getLLVMType(); 1353 unsigned PrevIdx = 0; 1354 ConstantInitBuilder CIBuilder(CGM); 1355 auto DI = Data.begin(); 1356 for (const FieldDecl *FD : RD->fields()) { 1357 unsigned Idx = RL.getLLVMFieldNo(FD); 1358 // Fill the alignment. 1359 for (unsigned I = PrevIdx; I < Idx; ++I) 1360 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1361 PrevIdx = Idx + 1; 1362 Fields.add(*DI); 1363 ++DI; 1364 } 1365} 1366 1367template <class... As> 1368static llvm::GlobalVariable * 1369createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1370 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1371 As &&... Args) { 1372 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1373 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1374 ConstantInitBuilder CIBuilder(CGM); 1375 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1376 buildStructValue(Fields, CGM, RD, RL, Data); 1377 return Fields.finishAndCreateGlobal( 1378 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1379 std::forward<As>(Args)...); 1380} 1381 1382template <typename T> 1383static void 1384createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1385 ArrayRef<llvm::Constant *> Data, 1386 T &Parent) { 1387 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1388 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1389 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1390 buildStructValue(Fields, CGM, RD, RL, Data); 1391 Fields.finishAndAddTo(Parent); 1392} 1393 1394void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1395 bool AtCurrentPoint) { 1396 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1397 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1398 1399 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1400 if (AtCurrentPoint) { 1401 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1402 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1403 } else { 1404 Elem.second.ServiceInsertPt = 1405 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1406 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1407 } 1408} 1409 1410void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1411 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1412 if (Elem.second.ServiceInsertPt) { 1413 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1414 Elem.second.ServiceInsertPt = nullptr; 1415 Ptr->eraseFromParent(); 1416 } 1417} 1418 1419static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1420 SourceLocation Loc, 1421 SmallString<128> &Buffer) { 1422 llvm::raw_svector_ostream OS(Buffer); 1423 // Build debug location 1424 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1425 OS << ";" << PLoc.getFilename() << ";"; 1426 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1427 OS << FD->getQualifiedNameAsString(); 1428 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1429 return OS.str(); 1430} 1431 1432llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1433 SourceLocation Loc, 1434 unsigned Flags) { 1435 llvm::Constant *SrcLocStr; 1436 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1437 Loc.isInvalid()) { 1438 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1439 } else { 1440 std::string FunctionName = ""; 1441 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1442 FunctionName = FD->getQualifiedNameAsString(); 1443 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1444 const char *FileName = PLoc.getFilename(); 1445 unsigned Line = PLoc.getLine(); 1446 unsigned Column = PLoc.getColumn(); 1447 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1448 Line, Column); 1449 } 1450 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1451 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1452 Reserved2Flags); 1453} 1454 1455llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1456 SourceLocation Loc) { 1457 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1458 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1459 // the clang invariants used below might be broken. 1460 if (CGM.getLangOpts().OpenMPIRBuilder) { 1461 SmallString<128> Buffer; 1462 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1463 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1464 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1465 return OMPBuilder.getOrCreateThreadID( 1466 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1467 } 1468 1469 llvm::Value *ThreadID = nullptr; 1470 // Check whether we've already cached a load of the thread id in this 1471 // function. 1472 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1473 if (I != OpenMPLocThreadIDMap.end()) { 1474 ThreadID = I->second.ThreadID; 1475 if (ThreadID != nullptr) 1476 return ThreadID; 1477 } 1478 // If exceptions are enabled, do not use parameter to avoid possible crash. 1479 if (auto *OMPRegionInfo = 1480 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1481 if (OMPRegionInfo->getThreadIDVariable()) { 1482 // Check if this an outlined function with thread id passed as argument. 1483 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1484 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1485 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1486 !CGF.getLangOpts().CXXExceptions || 1487 CGF.Builder.GetInsertBlock() == TopBlock || 1488 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1489 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1490 TopBlock || 1491 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1492 CGF.Builder.GetInsertBlock()) { 1493 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1494 // If value loaded in entry block, cache it and use it everywhere in 1495 // function. 1496 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1498 Elem.second.ThreadID = ThreadID; 1499 } 1500 return ThreadID; 1501 } 1502 } 1503 } 1504 1505 // This is not an outlined function region - need to call __kmpc_int32 1506 // kmpc_global_thread_num(ident_t *loc). 1507 // Generate thread id value and cache this value for use across the 1508 // function. 1509 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1510 if (!Elem.second.ServiceInsertPt) 1511 setLocThreadIdInsertPt(CGF); 1512 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1513 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1514 llvm::CallInst *Call = CGF.Builder.CreateCall( 1515 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1516 OMPRTL___kmpc_global_thread_num), 1517 emitUpdateLocation(CGF, Loc)); 1518 Call->setCallingConv(CGF.getRuntimeCC()); 1519 Elem.second.ThreadID = Call; 1520 return Call; 1521} 1522 1523void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1524 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1525 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1526 clearLocThreadIdInsertPt(CGF); 1527 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1528 } 1529 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1530 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1531 UDRMap.erase(D); 1532 FunctionUDRMap.erase(CGF.CurFn); 1533 } 1534 auto I = FunctionUDMMap.find(CGF.CurFn); 1535 if (I != FunctionUDMMap.end()) { 1536 for(const auto *D : I->second) 1537 UDMMap.erase(D); 1538 FunctionUDMMap.erase(I); 1539 } 1540 LastprivateConditionalToTypes.erase(CGF.CurFn); 1541 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1542} 1543 1544llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1545 return OMPBuilder.IdentPtr; 1546} 1547 1548llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1549 if (!Kmpc_MicroTy) { 1550 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1551 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1552 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1553 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1554 } 1555 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1556} 1557 1558llvm::FunctionCallee 1559CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1560 assert((IVSize == 32 || IVSize == 64) && 1561 "IV size is not compatible with the omp runtime"); 1562 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1563 : "__kmpc_for_static_init_4u") 1564 : (IVSigned ? "__kmpc_for_static_init_8" 1565 : "__kmpc_for_static_init_8u"); 1566 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1567 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1568 llvm::Type *TypeParams[] = { 1569 getIdentTyPointerTy(), // loc 1570 CGM.Int32Ty, // tid 1571 CGM.Int32Ty, // schedtype 1572 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1573 PtrTy, // p_lower 1574 PtrTy, // p_upper 1575 PtrTy, // p_stride 1576 ITy, // incr 1577 ITy // chunk 1578 }; 1579 auto *FnTy = 1580 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1581 return CGM.CreateRuntimeFunction(FnTy, Name); 1582} 1583 1584llvm::FunctionCallee 1585CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1586 assert((IVSize == 32 || IVSize == 64) && 1587 "IV size is not compatible with the omp runtime"); 1588 StringRef Name = 1589 IVSize == 32 1590 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1591 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1592 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1593 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1594 CGM.Int32Ty, // tid 1595 CGM.Int32Ty, // schedtype 1596 ITy, // lower 1597 ITy, // upper 1598 ITy, // stride 1599 ITy // chunk 1600 }; 1601 auto *FnTy = 1602 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1603 return CGM.CreateRuntimeFunction(FnTy, Name); 1604} 1605 1606llvm::FunctionCallee 1607CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1608 assert((IVSize == 32 || IVSize == 64) && 1609 "IV size is not compatible with the omp runtime"); 1610 StringRef Name = 1611 IVSize == 32 1612 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1613 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1614 llvm::Type *TypeParams[] = { 1615 getIdentTyPointerTy(), // loc 1616 CGM.Int32Ty, // tid 1617 }; 1618 auto *FnTy = 1619 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1620 return CGM.CreateRuntimeFunction(FnTy, Name); 1621} 1622 1623llvm::FunctionCallee 1624CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1625 assert((IVSize == 32 || IVSize == 64) && 1626 "IV size is not compatible with the omp runtime"); 1627 StringRef Name = 1628 IVSize == 32 1629 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1630 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1631 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1632 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1633 llvm::Type *TypeParams[] = { 1634 getIdentTyPointerTy(), // loc 1635 CGM.Int32Ty, // tid 1636 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1637 PtrTy, // p_lower 1638 PtrTy, // p_upper 1639 PtrTy // p_stride 1640 }; 1641 auto *FnTy = 1642 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1643 return CGM.CreateRuntimeFunction(FnTy, Name); 1644} 1645 1646/// Obtain information that uniquely identifies a target entry. This 1647/// consists of the file and device IDs as well as line number associated with 1648/// the relevant entry source location. 1649static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1650 unsigned &DeviceID, unsigned &FileID, 1651 unsigned &LineNum) { 1652 SourceManager &SM = C.getSourceManager(); 1653 1654 // The loc should be always valid and have a file ID (the user cannot use 1655 // #pragma directives in macros) 1656 1657 assert(Loc.isValid() && "Source location is expected to be always valid."); 1658 1659 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1660 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1661 1662 llvm::sys::fs::UniqueID ID; 1663 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1664 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1665 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1666 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1667 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1668 << PLoc.getFilename() << EC.message(); 1669 } 1670 1671 DeviceID = ID.getDevice(); 1672 FileID = ID.getFile(); 1673 LineNum = PLoc.getLine(); 1674} 1675 1676Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1677 if (CGM.getLangOpts().OpenMPSimd) 1678 return Address::invalid(); 1679 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1680 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1681 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1682 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1683 HasRequiresUnifiedSharedMemory))) { 1684 SmallString<64> PtrName; 1685 { 1686 llvm::raw_svector_ostream OS(PtrName); 1687 OS << CGM.getMangledName(GlobalDecl(VD)); 1688 if (!VD->isExternallyVisible()) { 1689 unsigned DeviceID, FileID, Line; 1690 getTargetEntryUniqueInfo(CGM.getContext(), 1691 VD->getCanonicalDecl()->getBeginLoc(), 1692 DeviceID, FileID, Line); 1693 OS << llvm::format("_%x", FileID); 1694 } 1695 OS << "_decl_tgt_ref_ptr"; 1696 } 1697 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1698 if (!Ptr) { 1699 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1700 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1701 PtrName); 1702 1703 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1704 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1705 1706 if (!CGM.getLangOpts().OpenMPIsDevice) 1707 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1708 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1709 } 1710 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1711 } 1712 return Address::invalid(); 1713} 1714 1715llvm::Constant * 1716CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1717 assert(!CGM.getLangOpts().OpenMPUseTLS || 1718 !CGM.getContext().getTargetInfo().isTLSSupported()); 1719 // Lookup the entry, lazily creating it if necessary. 1720 std::string Suffix = getName({"cache", ""}); 1721 return getOrCreateInternalVariable( 1722 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1723} 1724 1725Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1726 const VarDecl *VD, 1727 Address VDAddr, 1728 SourceLocation Loc) { 1729 if (CGM.getLangOpts().OpenMPUseTLS && 1730 CGM.getContext().getTargetInfo().isTLSSupported()) 1731 return VDAddr; 1732 1733 llvm::Type *VarTy = VDAddr.getElementType(); 1734 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1735 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1736 CGM.Int8PtrTy), 1737 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1738 getOrCreateThreadPrivateCache(VD)}; 1739 return Address(CGF.EmitRuntimeCall( 1740 OMPBuilder.getOrCreateRuntimeFunction( 1741 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1742 Args), 1743 VDAddr.getAlignment()); 1744} 1745 1746void CGOpenMPRuntime::emitThreadPrivateVarInit( 1747 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1748 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1749 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1750 // library. 1751 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1752 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1753 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1754 OMPLoc); 1755 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1756 // to register constructor/destructor for variable. 1757 llvm::Value *Args[] = { 1758 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1759 Ctor, CopyCtor, Dtor}; 1760 CGF.EmitRuntimeCall( 1761 OMPBuilder.getOrCreateRuntimeFunction( 1762 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1763 Args); 1764} 1765 1766llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1767 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1768 bool PerformInit, CodeGenFunction *CGF) { 1769 if (CGM.getLangOpts().OpenMPUseTLS && 1770 CGM.getContext().getTargetInfo().isTLSSupported()) 1771 return nullptr; 1772 1773 VD = VD->getDefinition(CGM.getContext()); 1774 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1775 QualType ASTTy = VD->getType(); 1776 1777 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1778 const Expr *Init = VD->getAnyInitializer(); 1779 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1780 // Generate function that re-emits the declaration's initializer into the 1781 // threadprivate copy of the variable VD 1782 CodeGenFunction CtorCGF(CGM); 1783 FunctionArgList Args; 1784 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1785 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1786 ImplicitParamDecl::Other); 1787 Args.push_back(&Dst); 1788 1789 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1790 CGM.getContext().VoidPtrTy, Args); 1791 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1792 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1793 llvm::Function *Fn = 1794 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1795 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1796 Args, Loc, Loc); 1797 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1798 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1799 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1800 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1801 Arg = CtorCGF.Builder.CreateElementBitCast( 1802 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1803 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1804 /*IsInitializer=*/true); 1805 ArgVal = CtorCGF.EmitLoadOfScalar( 1806 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1807 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1808 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1809 CtorCGF.FinishFunction(); 1810 Ctor = Fn; 1811 } 1812 if (VD->getType().isDestructedType() != QualType::DK_none) { 1813 // Generate function that emits destructor call for the threadprivate copy 1814 // of the variable VD 1815 CodeGenFunction DtorCGF(CGM); 1816 FunctionArgList Args; 1817 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1818 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1819 ImplicitParamDecl::Other); 1820 Args.push_back(&Dst); 1821 1822 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1823 CGM.getContext().VoidTy, Args); 1824 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1825 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1826 llvm::Function *Fn = 1827 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1828 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1829 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1830 Loc, Loc); 1831 // Create a scope with an artificial location for the body of this function. 1832 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1833 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1834 DtorCGF.GetAddrOfLocalVar(&Dst), 1835 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1836 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1837 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1838 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1839 DtorCGF.FinishFunction(); 1840 Dtor = Fn; 1841 } 1842 // Do not emit init function if it is not required. 1843 if (!Ctor && !Dtor) 1844 return nullptr; 1845 1846 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1847 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1848 /*isVarArg=*/false) 1849 ->getPointerTo(); 1850 // Copying constructor for the threadprivate variable. 1851 // Must be NULL - reserved by runtime, but currently it requires that this 1852 // parameter is always NULL. Otherwise it fires assertion. 1853 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1854 if (Ctor == nullptr) { 1855 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1856 /*isVarArg=*/false) 1857 ->getPointerTo(); 1858 Ctor = llvm::Constant::getNullValue(CtorTy); 1859 } 1860 if (Dtor == nullptr) { 1861 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1862 /*isVarArg=*/false) 1863 ->getPointerTo(); 1864 Dtor = llvm::Constant::getNullValue(DtorTy); 1865 } 1866 if (!CGF) { 1867 auto *InitFunctionTy = 1868 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1869 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1870 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1871 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1872 CodeGenFunction InitCGF(CGM); 1873 FunctionArgList ArgList; 1874 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1875 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1876 Loc, Loc); 1877 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1878 InitCGF.FinishFunction(); 1879 return InitFunction; 1880 } 1881 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1882 } 1883 return nullptr; 1884} 1885 1886bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1887 llvm::GlobalVariable *Addr, 1888 bool PerformInit) { 1889 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1890 !CGM.getLangOpts().OpenMPIsDevice) 1891 return false; 1892 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1893 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1894 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1895 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1896 HasRequiresUnifiedSharedMemory)) 1897 return CGM.getLangOpts().OpenMPIsDevice; 1898 VD = VD->getDefinition(CGM.getContext()); 1899 assert(VD && "Unknown VarDecl"); 1900 1901 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1902 return CGM.getLangOpts().OpenMPIsDevice; 1903 1904 QualType ASTTy = VD->getType(); 1905 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1906 1907 // Produce the unique prefix to identify the new target regions. We use 1908 // the source location of the variable declaration which we know to not 1909 // conflict with any target region. 1910 unsigned DeviceID; 1911 unsigned FileID; 1912 unsigned Line; 1913 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1914 SmallString<128> Buffer, Out; 1915 { 1916 llvm::raw_svector_ostream OS(Buffer); 1917 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1918 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1919 } 1920 1921 const Expr *Init = VD->getAnyInitializer(); 1922 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1923 llvm::Constant *Ctor; 1924 llvm::Constant *ID; 1925 if (CGM.getLangOpts().OpenMPIsDevice) { 1926 // Generate function that re-emits the declaration's initializer into 1927 // the threadprivate copy of the variable VD 1928 CodeGenFunction CtorCGF(CGM); 1929 1930 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1931 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1932 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1933 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1934 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1935 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1936 FunctionArgList(), Loc, Loc); 1937 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1938 CtorCGF.EmitAnyExprToMem(Init, 1939 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1940 Init->getType().getQualifiers(), 1941 /*IsInitializer=*/true); 1942 CtorCGF.FinishFunction(); 1943 Ctor = Fn; 1944 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1945 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1946 } else { 1947 Ctor = new llvm::GlobalVariable( 1948 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1949 llvm::GlobalValue::PrivateLinkage, 1950 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1951 ID = Ctor; 1952 } 1953 1954 // Register the information for the entry associated with the constructor. 1955 Out.clear(); 1956 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1957 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1958 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1959 } 1960 if (VD->getType().isDestructedType() != QualType::DK_none) { 1961 llvm::Constant *Dtor; 1962 llvm::Constant *ID; 1963 if (CGM.getLangOpts().OpenMPIsDevice) { 1964 // Generate function that emits destructor call for the threadprivate 1965 // copy of the variable VD 1966 CodeGenFunction DtorCGF(CGM); 1967 1968 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1969 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1970 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1971 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1972 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1973 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1974 FunctionArgList(), Loc, Loc); 1975 // Create a scope with an artificial location for the body of this 1976 // function. 1977 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1978 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1979 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1980 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1981 DtorCGF.FinishFunction(); 1982 Dtor = Fn; 1983 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1984 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1985 } else { 1986 Dtor = new llvm::GlobalVariable( 1987 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1988 llvm::GlobalValue::PrivateLinkage, 1989 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1990 ID = Dtor; 1991 } 1992 // Register the information for the entry associated with the destructor. 1993 Out.clear(); 1994 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1995 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1996 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1997 } 1998 return CGM.getLangOpts().OpenMPIsDevice; 1999} 2000 2001Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2002 QualType VarType, 2003 StringRef Name) { 2004 std::string Suffix = getName({"artificial", ""}); 2005 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2006 llvm::Value *GAddr = 2007 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2008 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2009 CGM.getTarget().isTLSSupported()) { 2010 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2011 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2012 } 2013 std::string CacheSuffix = getName({"cache", ""}); 2014 llvm::Value *Args[] = { 2015 emitUpdateLocation(CGF, SourceLocation()), 2016 getThreadID(CGF, SourceLocation()), 2017 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2018 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2019 /*isSigned=*/false), 2020 getOrCreateInternalVariable( 2021 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2022 return Address( 2023 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2024 CGF.EmitRuntimeCall( 2025 OMPBuilder.getOrCreateRuntimeFunction( 2026 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2027 Args), 2028 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2029 CGM.getContext().getTypeAlignInChars(VarType)); 2030} 2031 2032void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2033 const RegionCodeGenTy &ThenGen, 2034 const RegionCodeGenTy &ElseGen) { 2035 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2036 2037 // If the condition constant folds and can be elided, try to avoid emitting 2038 // the condition and the dead arm of the if/else. 2039 bool CondConstant; 2040 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2041 if (CondConstant) 2042 ThenGen(CGF); 2043 else 2044 ElseGen(CGF); 2045 return; 2046 } 2047 2048 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2049 // emit the conditional branch. 2050 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2051 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2052 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2053 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2054 2055 // Emit the 'then' code. 2056 CGF.EmitBlock(ThenBlock); 2057 ThenGen(CGF); 2058 CGF.EmitBranch(ContBlock); 2059 // Emit the 'else' code if present. 2060 // There is no need to emit line number for unconditional branch. 2061 (void)ApplyDebugLocation::CreateEmpty(CGF); 2062 CGF.EmitBlock(ElseBlock); 2063 ElseGen(CGF); 2064 // There is no need to emit line number for unconditional branch. 2065 (void)ApplyDebugLocation::CreateEmpty(CGF); 2066 CGF.EmitBranch(ContBlock); 2067 // Emit the continuation block for code after the if. 2068 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2069} 2070 2071void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2072 llvm::Function *OutlinedFn, 2073 ArrayRef<llvm::Value *> CapturedVars, 2074 const Expr *IfCond) { 2075 if (!CGF.HaveInsertPoint()) 2076 return; 2077 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2078 auto &M = CGM.getModule(); 2079 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2080 this](CodeGenFunction &CGF, PrePostActionTy &) { 2081 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2082 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2083 llvm::Value *Args[] = { 2084 RTLoc, 2085 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2086 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2087 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2088 RealArgs.append(std::begin(Args), std::end(Args)); 2089 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2090 2091 llvm::FunctionCallee RTLFn = 2092 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2093 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2094 }; 2095 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2096 this](CodeGenFunction &CGF, PrePostActionTy &) { 2097 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2098 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2099 // Build calls: 2100 // __kmpc_serialized_parallel(&Loc, GTid); 2101 llvm::Value *Args[] = {RTLoc, ThreadID}; 2102 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2103 M, OMPRTL___kmpc_serialized_parallel), 2104 Args); 2105 2106 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2107 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2108 Address ZeroAddrBound = 2109 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2110 /*Name=*/".bound.zero.addr"); 2111 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2112 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2113 // ThreadId for serialized parallels is 0. 2114 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2115 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2116 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2117 2118 // Ensure we do not inline the function. This is trivially true for the ones 2119 // passed to __kmpc_fork_call but the ones calles in serialized regions 2120 // could be inlined. This is not a perfect but it is closer to the invariant 2121 // we want, namely, every data environment starts with a new function. 2122 // TODO: We should pass the if condition to the runtime function and do the 2123 // handling there. Much cleaner code. 2124 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2125 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2126 2127 // __kmpc_end_serialized_parallel(&Loc, GTid); 2128 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2129 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2130 M, OMPRTL___kmpc_end_serialized_parallel), 2131 EndArgs); 2132 }; 2133 if (IfCond) { 2134 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2135 } else { 2136 RegionCodeGenTy ThenRCG(ThenGen); 2137 ThenRCG(CGF); 2138 } 2139} 2140 2141// If we're inside an (outlined) parallel region, use the region info's 2142// thread-ID variable (it is passed in a first argument of the outlined function 2143// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2144// regular serial code region, get thread ID by calling kmp_int32 2145// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2146// return the address of that temp. 2147Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2148 SourceLocation Loc) { 2149 if (auto *OMPRegionInfo = 2150 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2151 if (OMPRegionInfo->getThreadIDVariable()) 2152 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2153 2154 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2155 QualType Int32Ty = 2156 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2157 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2158 CGF.EmitStoreOfScalar(ThreadID, 2159 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2160 2161 return ThreadIDTemp; 2162} 2163 2164llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2165 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2166 SmallString<256> Buffer; 2167 llvm::raw_svector_ostream Out(Buffer); 2168 Out << Name; 2169 StringRef RuntimeName = Out.str(); 2170 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2171 if (Elem.second) { 2172 assert(Elem.second->getType()->getPointerElementType() == Ty && 2173 "OMP internal variable has different type than requested"); 2174 return &*Elem.second; 2175 } 2176 2177 return Elem.second = new llvm::GlobalVariable( 2178 CGM.getModule(), Ty, /*IsConstant*/ false, 2179 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2180 Elem.first(), /*InsertBefore=*/nullptr, 2181 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2182} 2183 2184llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2185 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2186 std::string Name = getName({Prefix, "var"}); 2187 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2188} 2189 2190namespace { 2191/// Common pre(post)-action for different OpenMP constructs. 2192class CommonActionTy final : public PrePostActionTy { 2193 llvm::FunctionCallee EnterCallee; 2194 ArrayRef<llvm::Value *> EnterArgs; 2195 llvm::FunctionCallee ExitCallee; 2196 ArrayRef<llvm::Value *> ExitArgs; 2197 bool Conditional; 2198 llvm::BasicBlock *ContBlock = nullptr; 2199 2200public: 2201 CommonActionTy(llvm::FunctionCallee EnterCallee, 2202 ArrayRef<llvm::Value *> EnterArgs, 2203 llvm::FunctionCallee ExitCallee, 2204 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2205 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2206 ExitArgs(ExitArgs), Conditional(Conditional) {} 2207 void Enter(CodeGenFunction &CGF) override { 2208 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2209 if (Conditional) { 2210 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2211 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2212 ContBlock = CGF.createBasicBlock("omp_if.end"); 2213 // Generate the branch (If-stmt) 2214 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2215 CGF.EmitBlock(ThenBlock); 2216 } 2217 } 2218 void Done(CodeGenFunction &CGF) { 2219 // Emit the rest of blocks/branches 2220 CGF.EmitBranch(ContBlock); 2221 CGF.EmitBlock(ContBlock, true); 2222 } 2223 void Exit(CodeGenFunction &CGF) override { 2224 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2225 } 2226}; 2227} // anonymous namespace 2228 2229void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2230 StringRef CriticalName, 2231 const RegionCodeGenTy &CriticalOpGen, 2232 SourceLocation Loc, const Expr *Hint) { 2233 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2234 // CriticalOpGen(); 2235 // __kmpc_end_critical(ident_t *, gtid, Lock); 2236 // Prepare arguments and build a call to __kmpc_critical 2237 if (!CGF.HaveInsertPoint()) 2238 return; 2239 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2240 getCriticalRegionLock(CriticalName)}; 2241 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2242 std::end(Args)); 2243 if (Hint) { 2244 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2245 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2246 } 2247 CommonActionTy Action( 2248 OMPBuilder.getOrCreateRuntimeFunction( 2249 CGM.getModule(), 2250 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2251 EnterArgs, 2252 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2253 OMPRTL___kmpc_end_critical), 2254 Args); 2255 CriticalOpGen.setAction(Action); 2256 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2257} 2258 2259void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2260 const RegionCodeGenTy &MasterOpGen, 2261 SourceLocation Loc) { 2262 if (!CGF.HaveInsertPoint()) 2263 return; 2264 // if(__kmpc_master(ident_t *, gtid)) { 2265 // MasterOpGen(); 2266 // __kmpc_end_master(ident_t *, gtid); 2267 // } 2268 // Prepare arguments and build a call to __kmpc_master 2269 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2270 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2271 CGM.getModule(), OMPRTL___kmpc_master), 2272 Args, 2273 OMPBuilder.getOrCreateRuntimeFunction( 2274 CGM.getModule(), OMPRTL___kmpc_end_master), 2275 Args, 2276 /*Conditional=*/true); 2277 MasterOpGen.setAction(Action); 2278 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2279 Action.Done(CGF); 2280} 2281 2282void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2283 const RegionCodeGenTy &MaskedOpGen, 2284 SourceLocation Loc, const Expr *Filter) { 2285 if (!CGF.HaveInsertPoint()) 2286 return; 2287 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2288 // MaskedOpGen(); 2289 // __kmpc_end_masked(iden_t *, gtid); 2290 // } 2291 // Prepare arguments and build a call to __kmpc_masked 2292 llvm::Value *FilterVal = Filter 2293 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2294 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2295 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2296 FilterVal}; 2297 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2298 getThreadID(CGF, Loc)}; 2299 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2300 CGM.getModule(), OMPRTL___kmpc_masked), 2301 Args, 2302 OMPBuilder.getOrCreateRuntimeFunction( 2303 CGM.getModule(), OMPRTL___kmpc_end_masked), 2304 ArgsEnd, 2305 /*Conditional=*/true); 2306 MaskedOpGen.setAction(Action); 2307 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2308 Action.Done(CGF); 2309} 2310 2311void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2312 SourceLocation Loc) { 2313 if (!CGF.HaveInsertPoint()) 2314 return; 2315 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2316 OMPBuilder.createTaskyield(CGF.Builder); 2317 } else { 2318 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2319 llvm::Value *Args[] = { 2320 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2321 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2322 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2323 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2324 Args); 2325 } 2326 2327 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2328 Region->emitUntiedSwitch(CGF); 2329} 2330 2331void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2332 const RegionCodeGenTy &TaskgroupOpGen, 2333 SourceLocation Loc) { 2334 if (!CGF.HaveInsertPoint()) 2335 return; 2336 // __kmpc_taskgroup(ident_t *, gtid); 2337 // TaskgroupOpGen(); 2338 // __kmpc_end_taskgroup(ident_t *, gtid); 2339 // Prepare arguments and build a call to __kmpc_taskgroup 2340 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2341 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2342 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2343 Args, 2344 OMPBuilder.getOrCreateRuntimeFunction( 2345 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2346 Args); 2347 TaskgroupOpGen.setAction(Action); 2348 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2349} 2350 2351/// Given an array of pointers to variables, project the address of a 2352/// given variable. 2353static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2354 unsigned Index, const VarDecl *Var) { 2355 // Pull out the pointer to the variable. 2356 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2357 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2358 2359 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2360 Addr = CGF.Builder.CreateElementBitCast( 2361 Addr, CGF.ConvertTypeForMem(Var->getType())); 2362 return Addr; 2363} 2364 2365static llvm::Value *emitCopyprivateCopyFunction( 2366 CodeGenModule &CGM, llvm::Type *ArgsType, 2367 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2368 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2369 SourceLocation Loc) { 2370 ASTContext &C = CGM.getContext(); 2371 // void copy_func(void *LHSArg, void *RHSArg); 2372 FunctionArgList Args; 2373 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2374 ImplicitParamDecl::Other); 2375 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2376 ImplicitParamDecl::Other); 2377 Args.push_back(&LHSArg); 2378 Args.push_back(&RHSArg); 2379 const auto &CGFI = 2380 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2381 std::string Name = 2382 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2383 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2384 llvm::GlobalValue::InternalLinkage, Name, 2385 &CGM.getModule()); 2386 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2387 Fn->setDoesNotRecurse(); 2388 CodeGenFunction CGF(CGM); 2389 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2390 // Dest = (void*[n])(LHSArg); 2391 // Src = (void*[n])(RHSArg); 2392 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2393 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2394 ArgsType), CGF.getPointerAlign()); 2395 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2396 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2397 ArgsType), CGF.getPointerAlign()); 2398 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2399 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2400 // ... 2401 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2402 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2403 const auto *DestVar = 2404 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2405 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2406 2407 const auto *SrcVar = 2408 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2409 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2410 2411 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2412 QualType Type = VD->getType(); 2413 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2414 } 2415 CGF.FinishFunction(); 2416 return Fn; 2417} 2418 2419void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2420 const RegionCodeGenTy &SingleOpGen, 2421 SourceLocation Loc, 2422 ArrayRef<const Expr *> CopyprivateVars, 2423 ArrayRef<const Expr *> SrcExprs, 2424 ArrayRef<const Expr *> DstExprs, 2425 ArrayRef<const Expr *> AssignmentOps) { 2426 if (!CGF.HaveInsertPoint()) 2427 return; 2428 assert(CopyprivateVars.size() == SrcExprs.size() && 2429 CopyprivateVars.size() == DstExprs.size() && 2430 CopyprivateVars.size() == AssignmentOps.size()); 2431 ASTContext &C = CGM.getContext(); 2432 // int32 did_it = 0; 2433 // if(__kmpc_single(ident_t *, gtid)) { 2434 // SingleOpGen(); 2435 // __kmpc_end_single(ident_t *, gtid); 2436 // did_it = 1; 2437 // } 2438 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2439 // <copy_func>, did_it); 2440 2441 Address DidIt = Address::invalid(); 2442 if (!CopyprivateVars.empty()) { 2443 // int32 did_it = 0; 2444 QualType KmpInt32Ty = 2445 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2446 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2447 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2448 } 2449 // Prepare arguments and build a call to __kmpc_single 2450 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2451 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2452 CGM.getModule(), OMPRTL___kmpc_single), 2453 Args, 2454 OMPBuilder.getOrCreateRuntimeFunction( 2455 CGM.getModule(), OMPRTL___kmpc_end_single), 2456 Args, 2457 /*Conditional=*/true); 2458 SingleOpGen.setAction(Action); 2459 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2460 if (DidIt.isValid()) { 2461 // did_it = 1; 2462 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2463 } 2464 Action.Done(CGF); 2465 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2466 // <copy_func>, did_it); 2467 if (DidIt.isValid()) { 2468 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2469 QualType CopyprivateArrayTy = C.getConstantArrayType( 2470 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2471 /*IndexTypeQuals=*/0); 2472 // Create a list of all private variables for copyprivate. 2473 Address CopyprivateList = 2474 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2475 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2476 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2477 CGF.Builder.CreateStore( 2478 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2479 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2480 CGF.VoidPtrTy), 2481 Elem); 2482 } 2483 // Build function that copies private values from single region to all other 2484 // threads in the corresponding parallel region. 2485 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2486 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2487 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2488 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2489 Address CL = 2490 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2491 CGF.VoidPtrTy); 2492 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2493 llvm::Value *Args[] = { 2494 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2495 getThreadID(CGF, Loc), // i32 <gtid> 2496 BufSize, // size_t <buf_size> 2497 CL.getPointer(), // void *<copyprivate list> 2498 CpyFn, // void (*) (void *, void *) <copy_func> 2499 DidItVal // i32 did_it 2500 }; 2501 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2502 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2503 Args); 2504 } 2505} 2506 2507void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2508 const RegionCodeGenTy &OrderedOpGen, 2509 SourceLocation Loc, bool IsThreads) { 2510 if (!CGF.HaveInsertPoint()) 2511 return; 2512 // __kmpc_ordered(ident_t *, gtid); 2513 // OrderedOpGen(); 2514 // __kmpc_end_ordered(ident_t *, gtid); 2515 // Prepare arguments and build a call to __kmpc_ordered 2516 if (IsThreads) { 2517 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2518 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2519 CGM.getModule(), OMPRTL___kmpc_ordered), 2520 Args, 2521 OMPBuilder.getOrCreateRuntimeFunction( 2522 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2523 Args); 2524 OrderedOpGen.setAction(Action); 2525 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2526 return; 2527 } 2528 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2529} 2530 2531unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2532 unsigned Flags; 2533 if (Kind == OMPD_for) 2534 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2535 else if (Kind == OMPD_sections) 2536 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2537 else if (Kind == OMPD_single) 2538 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2539 else if (Kind == OMPD_barrier) 2540 Flags = OMP_IDENT_BARRIER_EXPL; 2541 else 2542 Flags = OMP_IDENT_BARRIER_IMPL; 2543 return Flags; 2544} 2545 2546void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2547 CodeGenFunction &CGF, const OMPLoopDirective &S, 2548 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2549 // Check if the loop directive is actually a doacross loop directive. In this 2550 // case choose static, 1 schedule. 2551 if (llvm::any_of( 2552 S.getClausesOfKind<OMPOrderedClause>(), 2553 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2554 ScheduleKind = OMPC_SCHEDULE_static; 2555 // Chunk size is 1 in this case. 2556 llvm::APInt ChunkSize(32, 1); 2557 ChunkExpr = IntegerLiteral::Create( 2558 CGF.getContext(), ChunkSize, 2559 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2560 SourceLocation()); 2561 } 2562} 2563 2564void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2565 OpenMPDirectiveKind Kind, bool EmitChecks, 2566 bool ForceSimpleCall) { 2567 // Check if we should use the OMPBuilder 2568 auto *OMPRegionInfo = 2569 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2570 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2571 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2572 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2573 return; 2574 } 2575 2576 if (!CGF.HaveInsertPoint()) 2577 return; 2578 // Build call __kmpc_cancel_barrier(loc, thread_id); 2579 // Build call __kmpc_barrier(loc, thread_id); 2580 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2581 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2582 // thread_id); 2583 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2584 getThreadID(CGF, Loc)}; 2585 if (OMPRegionInfo) { 2586 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2587 llvm::Value *Result = CGF.EmitRuntimeCall( 2588 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2589 OMPRTL___kmpc_cancel_barrier), 2590 Args); 2591 if (EmitChecks) { 2592 // if (__kmpc_cancel_barrier()) { 2593 // exit from construct; 2594 // } 2595 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2596 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2597 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2598 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2599 CGF.EmitBlock(ExitBB); 2600 // exit from construct; 2601 CodeGenFunction::JumpDest CancelDestination = 2602 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2603 CGF.EmitBranchThroughCleanup(CancelDestination); 2604 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2605 } 2606 return; 2607 } 2608 } 2609 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2610 CGM.getModule(), OMPRTL___kmpc_barrier), 2611 Args); 2612} 2613 2614/// Map the OpenMP loop schedule to the runtime enumeration. 2615static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2616 bool Chunked, bool Ordered) { 2617 switch (ScheduleKind) { 2618 case OMPC_SCHEDULE_static: 2619 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2620 : (Ordered ? OMP_ord_static : OMP_sch_static); 2621 case OMPC_SCHEDULE_dynamic: 2622 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2623 case OMPC_SCHEDULE_guided: 2624 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2625 case OMPC_SCHEDULE_runtime: 2626 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2627 case OMPC_SCHEDULE_auto: 2628 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2629 case OMPC_SCHEDULE_unknown: 2630 assert(!Chunked && "chunk was specified but schedule kind not known"); 2631 return Ordered ? OMP_ord_static : OMP_sch_static; 2632 } 2633 llvm_unreachable("Unexpected runtime schedule"); 2634} 2635 2636/// Map the OpenMP distribute schedule to the runtime enumeration. 2637static OpenMPSchedType 2638getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2639 // only static is allowed for dist_schedule 2640 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2641} 2642 2643bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2644 bool Chunked) const { 2645 OpenMPSchedType Schedule = 2646 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2647 return Schedule == OMP_sch_static; 2648} 2649 2650bool CGOpenMPRuntime::isStaticNonchunked( 2651 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2652 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2653 return Schedule == OMP_dist_sch_static; 2654} 2655 2656bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2657 bool Chunked) const { 2658 OpenMPSchedType Schedule = 2659 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2660 return Schedule == OMP_sch_static_chunked; 2661} 2662 2663bool CGOpenMPRuntime::isStaticChunked( 2664 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2665 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2666 return Schedule == OMP_dist_sch_static_chunked; 2667} 2668 2669bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2670 OpenMPSchedType Schedule = 2671 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2672 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2673 return Schedule != OMP_sch_static; 2674} 2675 2676static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2677 OpenMPScheduleClauseModifier M1, 2678 OpenMPScheduleClauseModifier M2) { 2679 int Modifier = 0; 2680 switch (M1) { 2681 case OMPC_SCHEDULE_MODIFIER_monotonic: 2682 Modifier = OMP_sch_modifier_monotonic; 2683 break; 2684 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2685 Modifier = OMP_sch_modifier_nonmonotonic; 2686 break; 2687 case OMPC_SCHEDULE_MODIFIER_simd: 2688 if (Schedule == OMP_sch_static_chunked) 2689 Schedule = OMP_sch_static_balanced_chunked; 2690 break; 2691 case OMPC_SCHEDULE_MODIFIER_last: 2692 case OMPC_SCHEDULE_MODIFIER_unknown: 2693 break; 2694 } 2695 switch (M2) { 2696 case OMPC_SCHEDULE_MODIFIER_monotonic: 2697 Modifier = OMP_sch_modifier_monotonic; 2698 break; 2699 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2700 Modifier = OMP_sch_modifier_nonmonotonic; 2701 break; 2702 case OMPC_SCHEDULE_MODIFIER_simd: 2703 if (Schedule == OMP_sch_static_chunked) 2704 Schedule = OMP_sch_static_balanced_chunked; 2705 break; 2706 case OMPC_SCHEDULE_MODIFIER_last: 2707 case OMPC_SCHEDULE_MODIFIER_unknown: 2708 break; 2709 } 2710 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2711 // If the static schedule kind is specified or if the ordered clause is 2712 // specified, and if the nonmonotonic modifier is not specified, the effect is 2713 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2714 // modifier is specified, the effect is as if the nonmonotonic modifier is 2715 // specified. 2716 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2717 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2718 Schedule == OMP_sch_static_balanced_chunked || 2719 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2720 Schedule == OMP_dist_sch_static_chunked || 2721 Schedule == OMP_dist_sch_static)) 2722 Modifier = OMP_sch_modifier_nonmonotonic; 2723 } 2724 return Schedule | Modifier; 2725} 2726 2727void CGOpenMPRuntime::emitForDispatchInit( 2728 CodeGenFunction &CGF, SourceLocation Loc, 2729 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2730 bool Ordered, const DispatchRTInput &DispatchValues) { 2731 if (!CGF.HaveInsertPoint()) 2732 return; 2733 OpenMPSchedType Schedule = getRuntimeSchedule( 2734 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2735 assert(Ordered || 2736 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2737 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2738 Schedule != OMP_sch_static_balanced_chunked)); 2739 // Call __kmpc_dispatch_init( 2740 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2741 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2742 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2743 2744 // If the Chunk was not specified in the clause - use default value 1. 2745 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2746 : CGF.Builder.getIntN(IVSize, 1); 2747 llvm::Value *Args[] = { 2748 emitUpdateLocation(CGF, Loc), 2749 getThreadID(CGF, Loc), 2750 CGF.Builder.getInt32(addMonoNonMonoModifier( 2751 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2752 DispatchValues.LB, // Lower 2753 DispatchValues.UB, // Upper 2754 CGF.Builder.getIntN(IVSize, 1), // Stride 2755 Chunk // Chunk 2756 }; 2757 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2758} 2759 2760static void emitForStaticInitCall( 2761 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2762 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2763 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2764 const CGOpenMPRuntime::StaticRTInput &Values) { 2765 if (!CGF.HaveInsertPoint()) 2766 return; 2767 2768 assert(!Values.Ordered); 2769 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2770 Schedule == OMP_sch_static_balanced_chunked || 2771 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2772 Schedule == OMP_dist_sch_static || 2773 Schedule == OMP_dist_sch_static_chunked); 2774 2775 // Call __kmpc_for_static_init( 2776 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2777 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2778 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2779 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2780 llvm::Value *Chunk = Values.Chunk; 2781 if (Chunk == nullptr) { 2782 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2783 Schedule == OMP_dist_sch_static) && 2784 "expected static non-chunked schedule"); 2785 // If the Chunk was not specified in the clause - use default value 1. 2786 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2787 } else { 2788 assert((Schedule == OMP_sch_static_chunked || 2789 Schedule == OMP_sch_static_balanced_chunked || 2790 Schedule == OMP_ord_static_chunked || 2791 Schedule == OMP_dist_sch_static_chunked) && 2792 "expected static chunked schedule"); 2793 } 2794 llvm::Value *Args[] = { 2795 UpdateLocation, 2796 ThreadId, 2797 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2798 M2)), // Schedule type 2799 Values.IL.getPointer(), // &isLastIter 2800 Values.LB.getPointer(), // &LB 2801 Values.UB.getPointer(), // &UB 2802 Values.ST.getPointer(), // &Stride 2803 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2804 Chunk // Chunk 2805 }; 2806 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2807} 2808 2809void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2810 SourceLocation Loc, 2811 OpenMPDirectiveKind DKind, 2812 const OpenMPScheduleTy &ScheduleKind, 2813 const StaticRTInput &Values) { 2814 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2815 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2816 assert(isOpenMPWorksharingDirective(DKind) && 2817 "Expected loop-based or sections-based directive."); 2818 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2819 isOpenMPLoopDirective(DKind) 2820 ? OMP_IDENT_WORK_LOOP 2821 : OMP_IDENT_WORK_SECTIONS); 2822 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2823 llvm::FunctionCallee StaticInitFunction = 2824 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2825 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2826 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2827 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2828} 2829 2830void CGOpenMPRuntime::emitDistributeStaticInit( 2831 CodeGenFunction &CGF, SourceLocation Loc, 2832 OpenMPDistScheduleClauseKind SchedKind, 2833 const CGOpenMPRuntime::StaticRTInput &Values) { 2834 OpenMPSchedType ScheduleNum = 2835 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2836 llvm::Value *UpdatedLocation = 2837 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2838 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2839 llvm::FunctionCallee StaticInitFunction = 2840 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2841 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2842 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2843 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2844} 2845 2846void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2847 SourceLocation Loc, 2848 OpenMPDirectiveKind DKind) { 2849 if (!CGF.HaveInsertPoint()) 2850 return; 2851 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2852 llvm::Value *Args[] = { 2853 emitUpdateLocation(CGF, Loc, 2854 isOpenMPDistributeDirective(DKind) 2855 ? OMP_IDENT_WORK_DISTRIBUTE 2856 : isOpenMPLoopDirective(DKind) 2857 ? OMP_IDENT_WORK_LOOP 2858 : OMP_IDENT_WORK_SECTIONS), 2859 getThreadID(CGF, Loc)}; 2860 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2861 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2862 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2863 Args); 2864} 2865 2866void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2867 SourceLocation Loc, 2868 unsigned IVSize, 2869 bool IVSigned) { 2870 if (!CGF.HaveInsertPoint()) 2871 return; 2872 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2873 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2874 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2875} 2876 2877llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2878 SourceLocation Loc, unsigned IVSize, 2879 bool IVSigned, Address IL, 2880 Address LB, Address UB, 2881 Address ST) { 2882 // Call __kmpc_dispatch_next( 2883 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2884 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2885 // kmp_int[32|64] *p_stride); 2886 llvm::Value *Args[] = { 2887 emitUpdateLocation(CGF, Loc), 2888 getThreadID(CGF, Loc), 2889 IL.getPointer(), // &isLastIter 2890 LB.getPointer(), // &Lower 2891 UB.getPointer(), // &Upper 2892 ST.getPointer() // &Stride 2893 }; 2894 llvm::Value *Call = 2895 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2896 return CGF.EmitScalarConversion( 2897 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2898 CGF.getContext().BoolTy, Loc); 2899} 2900 2901void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2902 llvm::Value *NumThreads, 2903 SourceLocation Loc) { 2904 if (!CGF.HaveInsertPoint()) 2905 return; 2906 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2907 llvm::Value *Args[] = { 2908 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2909 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2910 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2911 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2912 Args); 2913} 2914 2915void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2916 ProcBindKind ProcBind, 2917 SourceLocation Loc) { 2918 if (!CGF.HaveInsertPoint()) 2919 return; 2920 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2921 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2922 llvm::Value *Args[] = { 2923 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2924 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2925 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2926 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2927 Args); 2928} 2929 2930void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2931 SourceLocation Loc, llvm::AtomicOrdering AO) { 2932 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2933 OMPBuilder.createFlush(CGF.Builder); 2934 } else { 2935 if (!CGF.HaveInsertPoint()) 2936 return; 2937 // Build call void __kmpc_flush(ident_t *loc) 2938 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2939 CGM.getModule(), OMPRTL___kmpc_flush), 2940 emitUpdateLocation(CGF, Loc)); 2941 } 2942} 2943 2944namespace { 2945/// Indexes of fields for type kmp_task_t. 2946enum KmpTaskTFields { 2947 /// List of shared variables. 2948 KmpTaskTShareds, 2949 /// Task routine. 2950 KmpTaskTRoutine, 2951 /// Partition id for the untied tasks. 2952 KmpTaskTPartId, 2953 /// Function with call of destructors for private variables. 2954 Data1, 2955 /// Task priority. 2956 Data2, 2957 /// (Taskloops only) Lower bound. 2958 KmpTaskTLowerBound, 2959 /// (Taskloops only) Upper bound. 2960 KmpTaskTUpperBound, 2961 /// (Taskloops only) Stride. 2962 KmpTaskTStride, 2963 /// (Taskloops only) Is last iteration flag. 2964 KmpTaskTLastIter, 2965 /// (Taskloops only) Reduction data. 2966 KmpTaskTReductions, 2967}; 2968} // anonymous namespace 2969 2970bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2971 return OffloadEntriesTargetRegion.empty() && 2972 OffloadEntriesDeviceGlobalVar.empty(); 2973} 2974 2975/// Initialize target region entry. 2976void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2977 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2978 StringRef ParentName, unsigned LineNum, 2979 unsigned Order) { 2980 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2981 "only required for the device " 2982 "code generation."); 2983 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2984 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2985 OMPTargetRegionEntryTargetRegion); 2986 ++OffloadingEntriesNum; 2987} 2988 2989void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2990 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2991 StringRef ParentName, unsigned LineNum, 2992 llvm::Constant *Addr, llvm::Constant *ID, 2993 OMPTargetRegionEntryKind Flags) { 2994 // If we are emitting code for a target, the entry is already initialized, 2995 // only has to be registered. 2996 if (CGM.getLangOpts().OpenMPIsDevice) { 2997 // This could happen if the device compilation is invoked standalone. 2998 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 2999 return; 3000 auto &Entry = 3001 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3002 Entry.setAddress(Addr); 3003 Entry.setID(ID); 3004 Entry.setFlags(Flags); 3005 } else { 3006 if (Flags == 3007 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3008 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3009 /*IgnoreAddressId*/ true)) 3010 return; 3011 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3012 "Target region entry already registered!"); 3013 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3014 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3015 ++OffloadingEntriesNum; 3016 } 3017} 3018 3019bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3020 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3021 bool IgnoreAddressId) const { 3022 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3023 if (PerDevice == OffloadEntriesTargetRegion.end()) 3024 return false; 3025 auto PerFile = PerDevice->second.find(FileID); 3026 if (PerFile == PerDevice->second.end()) 3027 return false; 3028 auto PerParentName = PerFile->second.find(ParentName); 3029 if (PerParentName == PerFile->second.end()) 3030 return false; 3031 auto PerLine = PerParentName->second.find(LineNum); 3032 if (PerLine == PerParentName->second.end()) 3033 return false; 3034 // Fail if this entry is already registered. 3035 if (!IgnoreAddressId && 3036 (PerLine->second.getAddress() || PerLine->second.getID())) 3037 return false; 3038 return true; 3039} 3040 3041void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3042 const OffloadTargetRegionEntryInfoActTy &Action) { 3043 // Scan all target region entries and perform the provided action. 3044 for (const auto &D : OffloadEntriesTargetRegion) 3045 for (const auto &F : D.second) 3046 for (const auto &P : F.second) 3047 for (const auto &L : P.second) 3048 Action(D.first, F.first, P.first(), L.first, L.second); 3049} 3050 3051void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3052 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3053 OMPTargetGlobalVarEntryKind Flags, 3054 unsigned Order) { 3055 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3056 "only required for the device " 3057 "code generation."); 3058 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3059 ++OffloadingEntriesNum; 3060} 3061 3062void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3063 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3064 CharUnits VarSize, 3065 OMPTargetGlobalVarEntryKind Flags, 3066 llvm::GlobalValue::LinkageTypes Linkage) { 3067 if (CGM.getLangOpts().OpenMPIsDevice) { 3068 // This could happen if the device compilation is invoked standalone. 3069 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3070 return; 3071 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3072 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3073 if (Entry.getVarSize().isZero()) { 3074 Entry.setVarSize(VarSize); 3075 Entry.setLinkage(Linkage); 3076 } 3077 return; 3078 } 3079 Entry.setVarSize(VarSize); 3080 Entry.setLinkage(Linkage); 3081 Entry.setAddress(Addr); 3082 } else { 3083 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3084 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3085 assert(Entry.isValid() && Entry.getFlags() == Flags && 3086 "Entry not initialized!"); 3087 if (Entry.getVarSize().isZero()) { 3088 Entry.setVarSize(VarSize); 3089 Entry.setLinkage(Linkage); 3090 } 3091 return; 3092 } 3093 OffloadEntriesDeviceGlobalVar.try_emplace( 3094 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3095 ++OffloadingEntriesNum; 3096 } 3097} 3098 3099void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3100 actOnDeviceGlobalVarEntriesInfo( 3101 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3102 // Scan all target region entries and perform the provided action. 3103 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3104 Action(E.getKey(), E.getValue()); 3105} 3106 3107void CGOpenMPRuntime::createOffloadEntry( 3108 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3109 llvm::GlobalValue::LinkageTypes Linkage) { 3110 StringRef Name = Addr->getName(); 3111 llvm::Module &M = CGM.getModule(); 3112 llvm::LLVMContext &C = M.getContext(); 3113 3114 // Create constant string with the name. 3115 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3116 3117 std::string StringName = getName({"omp_offloading", "entry_name"}); 3118 auto *Str = new llvm::GlobalVariable( 3119 M, StrPtrInit->getType(), /*isConstant=*/true, 3120 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3121 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3122 3123 llvm::Constant *Data[] = { 3124 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3125 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3126 llvm::ConstantInt::get(CGM.SizeTy, Size), 3127 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3128 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3129 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3130 llvm::GlobalVariable *Entry = createGlobalStruct( 3131 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3132 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3133 3134 // The entry has to be created in the section the linker expects it to be. 3135 Entry->setSection("omp_offloading_entries"); 3136} 3137 3138void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3139 // Emit the offloading entries and metadata so that the device codegen side 3140 // can easily figure out what to emit. The produced metadata looks like 3141 // this: 3142 // 3143 // !omp_offload.info = !{!1, ...} 3144 // 3145 // Right now we only generate metadata for function that contain target 3146 // regions. 3147 3148 // If we are in simd mode or there are no entries, we don't need to do 3149 // anything. 3150 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3151 return; 3152 3153 llvm::Module &M = CGM.getModule(); 3154 llvm::LLVMContext &C = M.getContext(); 3155 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3156 SourceLocation, StringRef>, 3157 16> 3158 OrderedEntries(OffloadEntriesInfoManager.size()); 3159 llvm::SmallVector<StringRef, 16> ParentFunctions( 3160 OffloadEntriesInfoManager.size()); 3161 3162 // Auxiliary methods to create metadata values and strings. 3163 auto &&GetMDInt = [this](unsigned V) { 3164 return llvm::ConstantAsMetadata::get( 3165 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3166 }; 3167 3168 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3169 3170 // Create the offloading info metadata node. 3171 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3172 3173 // Create function that emits metadata for each target region entry; 3174 auto &&TargetRegionMetadataEmitter = 3175 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3176 &GetMDString]( 3177 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3178 unsigned Line, 3179 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3180 // Generate metadata for target regions. Each entry of this metadata 3181 // contains: 3182 // - Entry 0 -> Kind of this type of metadata (0). 3183 // - Entry 1 -> Device ID of the file where the entry was identified. 3184 // - Entry 2 -> File ID of the file where the entry was identified. 3185 // - Entry 3 -> Mangled name of the function where the entry was 3186 // identified. 3187 // - Entry 4 -> Line in the file where the entry was identified. 3188 // - Entry 5 -> Order the entry was created. 3189 // The first element of the metadata node is the kind. 3190 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3191 GetMDInt(FileID), GetMDString(ParentName), 3192 GetMDInt(Line), GetMDInt(E.getOrder())}; 3193 3194 SourceLocation Loc; 3195 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3196 E = CGM.getContext().getSourceManager().fileinfo_end(); 3197 I != E; ++I) { 3198 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3199 I->getFirst()->getUniqueID().getFile() == FileID) { 3200 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3201 I->getFirst(), Line, 1); 3202 break; 3203 } 3204 } 3205 // Save this entry in the right position of the ordered entries array. 3206 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3207 ParentFunctions[E.getOrder()] = ParentName; 3208 3209 // Add metadata to the named metadata node. 3210 MD->addOperand(llvm::MDNode::get(C, Ops)); 3211 }; 3212 3213 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3214 TargetRegionMetadataEmitter); 3215 3216 // Create function that emits metadata for each device global variable entry; 3217 auto &&DeviceGlobalVarMetadataEmitter = 3218 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3219 MD](StringRef MangledName, 3220 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3221 &E) { 3222 // Generate metadata for global variables. Each entry of this metadata 3223 // contains: 3224 // - Entry 0 -> Kind of this type of metadata (1). 3225 // - Entry 1 -> Mangled name of the variable. 3226 // - Entry 2 -> Declare target kind. 3227 // - Entry 3 -> Order the entry was created. 3228 // The first element of the metadata node is the kind. 3229 llvm::Metadata *Ops[] = { 3230 GetMDInt(E.getKind()), GetMDString(MangledName), 3231 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3232 3233 // Save this entry in the right position of the ordered entries array. 3234 OrderedEntries[E.getOrder()] = 3235 std::make_tuple(&E, SourceLocation(), MangledName); 3236 3237 // Add metadata to the named metadata node. 3238 MD->addOperand(llvm::MDNode::get(C, Ops)); 3239 }; 3240 3241 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3242 DeviceGlobalVarMetadataEmitter); 3243 3244 for (const auto &E : OrderedEntries) { 3245 assert(std::get<0>(E) && "All ordered entries must exist!"); 3246 if (const auto *CE = 3247 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3248 std::get<0>(E))) { 3249 if (!CE->getID() || !CE->getAddress()) { 3250 // Do not blame the entry if the parent funtion is not emitted. 3251 StringRef FnName = ParentFunctions[CE->getOrder()]; 3252 if (!CGM.GetGlobalValue(FnName)) 3253 continue; 3254 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3255 DiagnosticsEngine::Error, 3256 "Offloading entry for target region in %0 is incorrect: either the " 3257 "address or the ID is invalid."); 3258 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3259 continue; 3260 } 3261 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3262 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3263 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3264 OffloadEntryInfoDeviceGlobalVar>( 3265 std::get<0>(E))) { 3266 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3267 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3268 CE->getFlags()); 3269 switch (Flags) { 3270 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3271 if (CGM.getLangOpts().OpenMPIsDevice && 3272 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3273 continue; 3274 if (!CE->getAddress()) { 3275 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3276 DiagnosticsEngine::Error, "Offloading entry for declare target " 3277 "variable %0 is incorrect: the " 3278 "address is invalid."); 3279 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3280 continue; 3281 } 3282 // The vaiable has no definition - no need to add the entry. 3283 if (CE->getVarSize().isZero()) 3284 continue; 3285 break; 3286 } 3287 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3288 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3289 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3290 "Declaret target link address is set."); 3291 if (CGM.getLangOpts().OpenMPIsDevice) 3292 continue; 3293 if (!CE->getAddress()) { 3294 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3295 DiagnosticsEngine::Error, 3296 "Offloading entry for declare target variable is incorrect: the " 3297 "address is invalid."); 3298 CGM.getDiags().Report(DiagID); 3299 continue; 3300 } 3301 break; 3302 } 3303 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3304 CE->getVarSize().getQuantity(), Flags, 3305 CE->getLinkage()); 3306 } else { 3307 llvm_unreachable("Unsupported entry kind."); 3308 } 3309 } 3310} 3311 3312/// Loads all the offload entries information from the host IR 3313/// metadata. 3314void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3315 // If we are in target mode, load the metadata from the host IR. This code has 3316 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3317 3318 if (!CGM.getLangOpts().OpenMPIsDevice) 3319 return; 3320 3321 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3322 return; 3323 3324 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3325 if (auto EC = Buf.getError()) { 3326 CGM.getDiags().Report(diag::err_cannot_open_file) 3327 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3328 return; 3329 } 3330 3331 llvm::LLVMContext C; 3332 auto ME = expectedToErrorOrAndEmitErrors( 3333 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3334 3335 if (auto EC = ME.getError()) { 3336 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3337 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3338 CGM.getDiags().Report(DiagID) 3339 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3340 return; 3341 } 3342 3343 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3344 if (!MD) 3345 return; 3346 3347 for (llvm::MDNode *MN : MD->operands()) { 3348 auto &&GetMDInt = [MN](unsigned Idx) { 3349 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3350 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3351 }; 3352 3353 auto &&GetMDString = [MN](unsigned Idx) { 3354 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3355 return V->getString(); 3356 }; 3357 3358 switch (GetMDInt(0)) { 3359 default: 3360 llvm_unreachable("Unexpected metadata!"); 3361 break; 3362 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3363 OffloadingEntryInfoTargetRegion: 3364 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3365 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3366 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3367 /*Order=*/GetMDInt(5)); 3368 break; 3369 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3370 OffloadingEntryInfoDeviceGlobalVar: 3371 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3372 /*MangledName=*/GetMDString(1), 3373 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3374 /*Flags=*/GetMDInt(2)), 3375 /*Order=*/GetMDInt(3)); 3376 break; 3377 } 3378 } 3379} 3380 3381void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3382 if (!KmpRoutineEntryPtrTy) { 3383 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3384 ASTContext &C = CGM.getContext(); 3385 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3386 FunctionProtoType::ExtProtoInfo EPI; 3387 KmpRoutineEntryPtrQTy = C.getPointerType( 3388 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3389 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3390 } 3391} 3392 3393QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3394 // Make sure the type of the entry is already created. This is the type we 3395 // have to create: 3396 // struct __tgt_offload_entry{ 3397 // void *addr; // Pointer to the offload entry info. 3398 // // (function or global) 3399 // char *name; // Name of the function or global. 3400 // size_t size; // Size of the entry info (0 if it a function). 3401 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3402 // int32_t reserved; // Reserved, to use by the runtime library. 3403 // }; 3404 if (TgtOffloadEntryQTy.isNull()) { 3405 ASTContext &C = CGM.getContext(); 3406 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3407 RD->startDefinition(); 3408 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3409 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3410 addFieldToRecordDecl(C, RD, C.getSizeType()); 3411 addFieldToRecordDecl( 3412 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3413 addFieldToRecordDecl( 3414 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3415 RD->completeDefinition(); 3416 RD->addAttr(PackedAttr::CreateImplicit(C)); 3417 TgtOffloadEntryQTy = C.getRecordType(RD); 3418 } 3419 return TgtOffloadEntryQTy; 3420} 3421 3422namespace { 3423struct PrivateHelpersTy { 3424 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3425 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3426 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3427 PrivateElemInit(PrivateElemInit) {} 3428 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3429 const Expr *OriginalRef = nullptr; 3430 const VarDecl *Original = nullptr; 3431 const VarDecl *PrivateCopy = nullptr; 3432 const VarDecl *PrivateElemInit = nullptr; 3433 bool isLocalPrivate() const { 3434 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3435 } 3436}; 3437typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3438} // anonymous namespace 3439 3440static bool isAllocatableDecl(const VarDecl *VD) { 3441 const VarDecl *CVD = VD->getCanonicalDecl(); 3442 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3443 return false; 3444 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3445 // Use the default allocation. 3446 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3447 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3448 !AA->getAllocator()); 3449} 3450 3451static RecordDecl * 3452createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3453 if (!Privates.empty()) { 3454 ASTContext &C = CGM.getContext(); 3455 // Build struct .kmp_privates_t. { 3456 // /* private vars */ 3457 // }; 3458 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3459 RD->startDefinition(); 3460 for (const auto &Pair : Privates) { 3461 const VarDecl *VD = Pair.second.Original; 3462 QualType Type = VD->getType().getNonReferenceType(); 3463 // If the private variable is a local variable with lvalue ref type, 3464 // allocate the pointer instead of the pointee type. 3465 if (Pair.second.isLocalPrivate()) { 3466 if (VD->getType()->isLValueReferenceType()) 3467 Type = C.getPointerType(Type); 3468 if (isAllocatableDecl(VD)) 3469 Type = C.getPointerType(Type); 3470 } 3471 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3472 if (VD->hasAttrs()) { 3473 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3474 E(VD->getAttrs().end()); 3475 I != E; ++I) 3476 FD->addAttr(*I); 3477 } 3478 } 3479 RD->completeDefinition(); 3480 return RD; 3481 } 3482 return nullptr; 3483} 3484 3485static RecordDecl * 3486createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3487 QualType KmpInt32Ty, 3488 QualType KmpRoutineEntryPointerQTy) { 3489 ASTContext &C = CGM.getContext(); 3490 // Build struct kmp_task_t { 3491 // void * shareds; 3492 // kmp_routine_entry_t routine; 3493 // kmp_int32 part_id; 3494 // kmp_cmplrdata_t data1; 3495 // kmp_cmplrdata_t data2; 3496 // For taskloops additional fields: 3497 // kmp_uint64 lb; 3498 // kmp_uint64 ub; 3499 // kmp_int64 st; 3500 // kmp_int32 liter; 3501 // void * reductions; 3502 // }; 3503 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3504 UD->startDefinition(); 3505 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3506 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3507 UD->completeDefinition(); 3508 QualType KmpCmplrdataTy = C.getRecordType(UD); 3509 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3510 RD->startDefinition(); 3511 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3512 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3513 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3514 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3515 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3516 if (isOpenMPTaskLoopDirective(Kind)) { 3517 QualType KmpUInt64Ty = 3518 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3519 QualType KmpInt64Ty = 3520 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3521 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3522 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3523 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3524 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3525 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3526 } 3527 RD->completeDefinition(); 3528 return RD; 3529} 3530 3531static RecordDecl * 3532createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3533 ArrayRef<PrivateDataTy> Privates) { 3534 ASTContext &C = CGM.getContext(); 3535 // Build struct kmp_task_t_with_privates { 3536 // kmp_task_t task_data; 3537 // .kmp_privates_t. privates; 3538 // }; 3539 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3540 RD->startDefinition(); 3541 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3542 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3543 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3544 RD->completeDefinition(); 3545 return RD; 3546} 3547 3548/// Emit a proxy function which accepts kmp_task_t as the second 3549/// argument. 3550/// \code 3551/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3552/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3553/// For taskloops: 3554/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3555/// tt->reductions, tt->shareds); 3556/// return 0; 3557/// } 3558/// \endcode 3559static llvm::Function * 3560emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3561 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3562 QualType KmpTaskTWithPrivatesPtrQTy, 3563 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3564 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3565 llvm::Value *TaskPrivatesMap) { 3566 ASTContext &C = CGM.getContext(); 3567 FunctionArgList Args; 3568 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3569 ImplicitParamDecl::Other); 3570 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3571 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3572 ImplicitParamDecl::Other); 3573 Args.push_back(&GtidArg); 3574 Args.push_back(&TaskTypeArg); 3575 const auto &TaskEntryFnInfo = 3576 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3577 llvm::FunctionType *TaskEntryTy = 3578 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3579 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3580 auto *TaskEntry = llvm::Function::Create( 3581 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3582 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3583 TaskEntry->setDoesNotRecurse(); 3584 CodeGenFunction CGF(CGM); 3585 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3586 Loc, Loc); 3587 3588 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3589 // tt, 3590 // For taskloops: 3591 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3592 // tt->task_data.shareds); 3593 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3594 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3595 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3596 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3597 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3598 const auto *KmpTaskTWithPrivatesQTyRD = 3599 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3600 LValue Base = 3601 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3602 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3603 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3604 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3605 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3606 3607 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3608 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3609 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3610 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3611 CGF.ConvertTypeForMem(SharedsPtrTy)); 3612 3613 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3614 llvm::Value *PrivatesParam; 3615 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3616 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3617 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3618 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3619 } else { 3620 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3621 } 3622 3623 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3624 TaskPrivatesMap, 3625 CGF.Builder 3626 .CreatePointerBitCastOrAddrSpaceCast( 3627 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3628 .getPointer()}; 3629 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3630 std::end(CommonArgs)); 3631 if (isOpenMPTaskLoopDirective(Kind)) { 3632 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3633 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3634 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3635 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3636 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3637 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3638 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3639 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3640 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3641 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3642 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3643 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3644 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3645 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3646 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3647 CallArgs.push_back(LBParam); 3648 CallArgs.push_back(UBParam); 3649 CallArgs.push_back(StParam); 3650 CallArgs.push_back(LIParam); 3651 CallArgs.push_back(RParam); 3652 } 3653 CallArgs.push_back(SharedsParam); 3654 3655 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3656 CallArgs); 3657 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3658 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3659 CGF.FinishFunction(); 3660 return TaskEntry; 3661} 3662 3663static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3664 SourceLocation Loc, 3665 QualType KmpInt32Ty, 3666 QualType KmpTaskTWithPrivatesPtrQTy, 3667 QualType KmpTaskTWithPrivatesQTy) { 3668 ASTContext &C = CGM.getContext(); 3669 FunctionArgList Args; 3670 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3671 ImplicitParamDecl::Other); 3672 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3673 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3674 ImplicitParamDecl::Other); 3675 Args.push_back(&GtidArg); 3676 Args.push_back(&TaskTypeArg); 3677 const auto &DestructorFnInfo = 3678 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3679 llvm::FunctionType *DestructorFnTy = 3680 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3681 std::string Name = 3682 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3683 auto *DestructorFn = 3684 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3685 Name, &CGM.getModule()); 3686 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3687 DestructorFnInfo); 3688 DestructorFn->setDoesNotRecurse(); 3689 CodeGenFunction CGF(CGM); 3690 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3691 Args, Loc, Loc); 3692 3693 LValue Base = CGF.EmitLoadOfPointerLValue( 3694 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3695 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3696 const auto *KmpTaskTWithPrivatesQTyRD = 3697 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3698 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3699 Base = CGF.EmitLValueForField(Base, *FI); 3700 for (const auto *Field : 3701 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3702 if (QualType::DestructionKind DtorKind = 3703 Field->getType().isDestructedType()) { 3704 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3705 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3706 } 3707 } 3708 CGF.FinishFunction(); 3709 return DestructorFn; 3710} 3711 3712/// Emit a privates mapping function for correct handling of private and 3713/// firstprivate variables. 3714/// \code 3715/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3716/// **noalias priv1,..., <tyn> **noalias privn) { 3717/// *priv1 = &.privates.priv1; 3718/// ...; 3719/// *privn = &.privates.privn; 3720/// } 3721/// \endcode 3722static llvm::Value * 3723emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3724 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3725 ArrayRef<PrivateDataTy> Privates) { 3726 ASTContext &C = CGM.getContext(); 3727 FunctionArgList Args; 3728 ImplicitParamDecl TaskPrivatesArg( 3729 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3730 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3731 ImplicitParamDecl::Other); 3732 Args.push_back(&TaskPrivatesArg); 3733 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3734 unsigned Counter = 1; 3735 for (const Expr *E : Data.PrivateVars) { 3736 Args.push_back(ImplicitParamDecl::Create( 3737 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3738 C.getPointerType(C.getPointerType(E->getType())) 3739 .withConst() 3740 .withRestrict(), 3741 ImplicitParamDecl::Other)); 3742 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3743 PrivateVarsPos[VD] = Counter; 3744 ++Counter; 3745 } 3746 for (const Expr *E : Data.FirstprivateVars) { 3747 Args.push_back(ImplicitParamDecl::Create( 3748 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3749 C.getPointerType(C.getPointerType(E->getType())) 3750 .withConst() 3751 .withRestrict(), 3752 ImplicitParamDecl::Other)); 3753 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3754 PrivateVarsPos[VD] = Counter; 3755 ++Counter; 3756 } 3757 for (const Expr *E : Data.LastprivateVars) { 3758 Args.push_back(ImplicitParamDecl::Create( 3759 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3760 C.getPointerType(C.getPointerType(E->getType())) 3761 .withConst() 3762 .withRestrict(), 3763 ImplicitParamDecl::Other)); 3764 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3765 PrivateVarsPos[VD] = Counter; 3766 ++Counter; 3767 } 3768 for (const VarDecl *VD : Data.PrivateLocals) { 3769 QualType Ty = VD->getType().getNonReferenceType(); 3770 if (VD->getType()->isLValueReferenceType()) 3771 Ty = C.getPointerType(Ty); 3772 if (isAllocatableDecl(VD)) 3773 Ty = C.getPointerType(Ty); 3774 Args.push_back(ImplicitParamDecl::Create( 3775 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3776 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3777 ImplicitParamDecl::Other)); 3778 PrivateVarsPos[VD] = Counter; 3779 ++Counter; 3780 } 3781 const auto &TaskPrivatesMapFnInfo = 3782 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3783 llvm::FunctionType *TaskPrivatesMapTy = 3784 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3785 std::string Name = 3786 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3787 auto *TaskPrivatesMap = llvm::Function::Create( 3788 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3789 &CGM.getModule()); 3790 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3791 TaskPrivatesMapFnInfo); 3792 if (CGM.getLangOpts().Optimize) { 3793 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3794 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3795 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3796 } 3797 CodeGenFunction CGF(CGM); 3798 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3799 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3800 3801 // *privi = &.privates.privi; 3802 LValue Base = CGF.EmitLoadOfPointerLValue( 3803 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3804 TaskPrivatesArg.getType()->castAs<PointerType>()); 3805 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3806 Counter = 0; 3807 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3808 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3809 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3810 LValue RefLVal = 3811 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3812 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3813 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3814 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3815 ++Counter; 3816 } 3817 CGF.FinishFunction(); 3818 return TaskPrivatesMap; 3819} 3820 3821/// Emit initialization for private variables in task-based directives. 3822static void emitPrivatesInit(CodeGenFunction &CGF, 3823 const OMPExecutableDirective &D, 3824 Address KmpTaskSharedsPtr, LValue TDBase, 3825 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3826 QualType SharedsTy, QualType SharedsPtrTy, 3827 const OMPTaskDataTy &Data, 3828 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3829 ASTContext &C = CGF.getContext(); 3830 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3831 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3832 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3833 ? OMPD_taskloop 3834 : OMPD_task; 3835 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3836 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3837 LValue SrcBase; 3838 bool IsTargetTask = 3839 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3840 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3841 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3842 // PointersArray, SizesArray, and MappersArray. The original variables for 3843 // these arrays are not captured and we get their addresses explicitly. 3844 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3845 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3846 SrcBase = CGF.MakeAddrLValue( 3847 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3848 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3849 SharedsTy); 3850 } 3851 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3852 for (const PrivateDataTy &Pair : Privates) { 3853 // Do not initialize private locals. 3854 if (Pair.second.isLocalPrivate()) { 3855 ++FI; 3856 continue; 3857 } 3858 const VarDecl *VD = Pair.second.PrivateCopy; 3859 const Expr *Init = VD->getAnyInitializer(); 3860 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3861 !CGF.isTrivialInitializer(Init)))) { 3862 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3863 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3864 const VarDecl *OriginalVD = Pair.second.Original; 3865 // Check if the variable is the target-based BasePointersArray, 3866 // PointersArray, SizesArray, or MappersArray. 3867 LValue SharedRefLValue; 3868 QualType Type = PrivateLValue.getType(); 3869 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3870 if (IsTargetTask && !SharedField) { 3871 assert(isa<ImplicitParamDecl>(OriginalVD) && 3872 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3873 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3874 ->getNumParams() == 0 && 3875 isa<TranslationUnitDecl>( 3876 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3877 ->getDeclContext()) && 3878 "Expected artificial target data variable."); 3879 SharedRefLValue = 3880 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3881 } else if (ForDup) { 3882 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3883 SharedRefLValue = CGF.MakeAddrLValue( 3884 Address(SharedRefLValue.getPointer(CGF), 3885 C.getDeclAlign(OriginalVD)), 3886 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3887 SharedRefLValue.getTBAAInfo()); 3888 } else if (CGF.LambdaCaptureFields.count( 3889 Pair.second.Original->getCanonicalDecl()) > 0 || 3890 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3891 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3892 } else { 3893 // Processing for implicitly captured variables. 3894 InlinedOpenMPRegionRAII Region( 3895 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3896 /*HasCancel=*/false, /*NoInheritance=*/true); 3897 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3898 } 3899 if (Type->isArrayType()) { 3900 // Initialize firstprivate array. 3901 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3902 // Perform simple memcpy. 3903 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3904 } else { 3905 // Initialize firstprivate array using element-by-element 3906 // initialization. 3907 CGF.EmitOMPAggregateAssign( 3908 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3909 Type, 3910 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3911 Address SrcElement) { 3912 // Clean up any temporaries needed by the initialization. 3913 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3914 InitScope.addPrivate( 3915 Elem, [SrcElement]() -> Address { return SrcElement; }); 3916 (void)InitScope.Privatize(); 3917 // Emit initialization for single element. 3918 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3919 CGF, &CapturesInfo); 3920 CGF.EmitAnyExprToMem(Init, DestElement, 3921 Init->getType().getQualifiers(), 3922 /*IsInitializer=*/false); 3923 }); 3924 } 3925 } else { 3926 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3927 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3928 return SharedRefLValue.getAddress(CGF); 3929 }); 3930 (void)InitScope.Privatize(); 3931 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3932 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3933 /*capturedByInit=*/false); 3934 } 3935 } else { 3936 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3937 } 3938 } 3939 ++FI; 3940 } 3941} 3942 3943/// Check if duplication function is required for taskloops. 3944static bool checkInitIsRequired(CodeGenFunction &CGF, 3945 ArrayRef<PrivateDataTy> Privates) { 3946 bool InitRequired = false; 3947 for (const PrivateDataTy &Pair : Privates) { 3948 if (Pair.second.isLocalPrivate()) 3949 continue; 3950 const VarDecl *VD = Pair.second.PrivateCopy; 3951 const Expr *Init = VD->getAnyInitializer(); 3952 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3953 !CGF.isTrivialInitializer(Init)); 3954 if (InitRequired) 3955 break; 3956 } 3957 return InitRequired; 3958} 3959 3960 3961/// Emit task_dup function (for initialization of 3962/// private/firstprivate/lastprivate vars and last_iter flag) 3963/// \code 3964/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3965/// lastpriv) { 3966/// // setup lastprivate flag 3967/// task_dst->last = lastpriv; 3968/// // could be constructor calls here... 3969/// } 3970/// \endcode 3971static llvm::Value * 3972emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3973 const OMPExecutableDirective &D, 3974 QualType KmpTaskTWithPrivatesPtrQTy, 3975 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3976 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3977 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3978 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3979 ASTContext &C = CGM.getContext(); 3980 FunctionArgList Args; 3981 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3982 KmpTaskTWithPrivatesPtrQTy, 3983 ImplicitParamDecl::Other); 3984 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3985 KmpTaskTWithPrivatesPtrQTy, 3986 ImplicitParamDecl::Other); 3987 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3988 ImplicitParamDecl::Other); 3989 Args.push_back(&DstArg); 3990 Args.push_back(&SrcArg); 3991 Args.push_back(&LastprivArg); 3992 const auto &TaskDupFnInfo = 3993 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3994 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3995 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3996 auto *TaskDup = llvm::Function::Create( 3997 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3998 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3999 TaskDup->setDoesNotRecurse(); 4000 CodeGenFunction CGF(CGM); 4001 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4002 Loc); 4003 4004 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4005 CGF.GetAddrOfLocalVar(&DstArg), 4006 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4007 // task_dst->liter = lastpriv; 4008 if (WithLastIter) { 4009 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4010 LValue Base = CGF.EmitLValueForField( 4011 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4012 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4013 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4014 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4015 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4016 } 4017 4018 // Emit initial values for private copies (if any). 4019 assert(!Privates.empty()); 4020 Address KmpTaskSharedsPtr = Address::invalid(); 4021 if (!Data.FirstprivateVars.empty()) { 4022 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4023 CGF.GetAddrOfLocalVar(&SrcArg), 4024 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4025 LValue Base = CGF.EmitLValueForField( 4026 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4027 KmpTaskSharedsPtr = Address( 4028 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4029 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4030 KmpTaskTShareds)), 4031 Loc), 4032 CGM.getNaturalTypeAlignment(SharedsTy)); 4033 } 4034 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4035 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4036 CGF.FinishFunction(); 4037 return TaskDup; 4038} 4039 4040/// Checks if destructor function is required to be generated. 4041/// \return true if cleanups are required, false otherwise. 4042static bool 4043checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4044 ArrayRef<PrivateDataTy> Privates) { 4045 for (const PrivateDataTy &P : Privates) { 4046 if (P.second.isLocalPrivate()) 4047 continue; 4048 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4049 if (Ty.isDestructedType()) 4050 return true; 4051 } 4052 return false; 4053} 4054 4055namespace { 4056/// Loop generator for OpenMP iterator expression. 4057class OMPIteratorGeneratorScope final 4058 : public CodeGenFunction::OMPPrivateScope { 4059 CodeGenFunction &CGF; 4060 const OMPIteratorExpr *E = nullptr; 4061 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4062 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4063 OMPIteratorGeneratorScope() = delete; 4064 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4065 4066public: 4067 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4068 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4069 if (!E) 4070 return; 4071 SmallVector<llvm::Value *, 4> Uppers; 4072 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4073 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4074 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4075 addPrivate(VD, [&CGF, VD]() { 4076 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4077 }); 4078 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4079 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4080 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4081 "counter.addr"); 4082 }); 4083 } 4084 Privatize(); 4085 4086 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4087 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4088 LValue CLVal = 4089 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4090 HelperData.CounterVD->getType()); 4091 // Counter = 0; 4092 CGF.EmitStoreOfScalar( 4093 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4094 CLVal); 4095 CodeGenFunction::JumpDest &ContDest = 4096 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4097 CodeGenFunction::JumpDest &ExitDest = 4098 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4099 // N = <number-of_iterations>; 4100 llvm::Value *N = Uppers[I]; 4101 // cont: 4102 // if (Counter < N) goto body; else goto exit; 4103 CGF.EmitBlock(ContDest.getBlock()); 4104 auto *CVal = 4105 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4106 llvm::Value *Cmp = 4107 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4108 ? CGF.Builder.CreateICmpSLT(CVal, N) 4109 : CGF.Builder.CreateICmpULT(CVal, N); 4110 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4111 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4112 // body: 4113 CGF.EmitBlock(BodyBB); 4114 // Iteri = Begini + Counter * Stepi; 4115 CGF.EmitIgnoredExpr(HelperData.Update); 4116 } 4117 } 4118 ~OMPIteratorGeneratorScope() { 4119 if (!E) 4120 return; 4121 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4122 // Counter = Counter + 1; 4123 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4124 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4125 // goto cont; 4126 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4127 // exit: 4128 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4129 } 4130 } 4131}; 4132} // namespace 4133 4134static std::pair<llvm::Value *, llvm::Value *> 4135getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4136 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4137 llvm::Value *Addr; 4138 if (OASE) { 4139 const Expr *Base = OASE->getBase(); 4140 Addr = CGF.EmitScalarExpr(Base); 4141 } else { 4142 Addr = CGF.EmitLValue(E).getPointer(CGF); 4143 } 4144 llvm::Value *SizeVal; 4145 QualType Ty = E->getType(); 4146 if (OASE) { 4147 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4148 for (const Expr *SE : OASE->getDimensions()) { 4149 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4150 Sz = CGF.EmitScalarConversion( 4151 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4152 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4153 } 4154 } else if (const auto *ASE = 4155 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4156 LValue UpAddrLVal = 4157 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4158 llvm::Value *UpAddr = 4159 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4160 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4161 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4162 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4163 } else { 4164 SizeVal = CGF.getTypeSize(Ty); 4165 } 4166 return std::make_pair(Addr, SizeVal); 4167} 4168 4169/// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4170static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4171 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4172 if (KmpTaskAffinityInfoTy.isNull()) { 4173 RecordDecl *KmpAffinityInfoRD = 4174 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4175 KmpAffinityInfoRD->startDefinition(); 4176 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4177 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4178 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4179 KmpAffinityInfoRD->completeDefinition(); 4180 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4181 } 4182} 4183 4184CGOpenMPRuntime::TaskResultTy 4185CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4186 const OMPExecutableDirective &D, 4187 llvm::Function *TaskFunction, QualType SharedsTy, 4188 Address Shareds, const OMPTaskDataTy &Data) { 4189 ASTContext &C = CGM.getContext(); 4190 llvm::SmallVector<PrivateDataTy, 4> Privates; 4191 // Aggregate privates and sort them by the alignment. 4192 const auto *I = Data.PrivateCopies.begin(); 4193 for (const Expr *E : Data.PrivateVars) { 4194 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4195 Privates.emplace_back( 4196 C.getDeclAlign(VD), 4197 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4198 /*PrivateElemInit=*/nullptr)); 4199 ++I; 4200 } 4201 I = Data.FirstprivateCopies.begin(); 4202 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4203 for (const Expr *E : Data.FirstprivateVars) { 4204 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4205 Privates.emplace_back( 4206 C.getDeclAlign(VD), 4207 PrivateHelpersTy( 4208 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4209 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4210 ++I; 4211 ++IElemInitRef; 4212 } 4213 I = Data.LastprivateCopies.begin(); 4214 for (const Expr *E : Data.LastprivateVars) { 4215 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4216 Privates.emplace_back( 4217 C.getDeclAlign(VD), 4218 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4219 /*PrivateElemInit=*/nullptr)); 4220 ++I; 4221 } 4222 for (const VarDecl *VD : Data.PrivateLocals) { 4223 if (isAllocatableDecl(VD)) 4224 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4225 else 4226 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4227 } 4228 llvm::stable_sort(Privates, 4229 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4230 return L.first > R.first; 4231 }); 4232 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4233 // Build type kmp_routine_entry_t (if not built yet). 4234 emitKmpRoutineEntryT(KmpInt32Ty); 4235 // Build type kmp_task_t (if not built yet). 4236 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4237 if (SavedKmpTaskloopTQTy.isNull()) { 4238 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4239 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4240 } 4241 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4242 } else { 4243 assert((D.getDirectiveKind() == OMPD_task || 4244 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4245 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4246 "Expected taskloop, task or target directive"); 4247 if (SavedKmpTaskTQTy.isNull()) { 4248 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4249 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4250 } 4251 KmpTaskTQTy = SavedKmpTaskTQTy; 4252 } 4253 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4254 // Build particular struct kmp_task_t for the given task. 4255 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4256 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4257 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4258 QualType KmpTaskTWithPrivatesPtrQTy = 4259 C.getPointerType(KmpTaskTWithPrivatesQTy); 4260 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4261 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4262 KmpTaskTWithPrivatesTy->getPointerTo(); 4263 llvm::Value *KmpTaskTWithPrivatesTySize = 4264 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4265 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4266 4267 // Emit initial values for private copies (if any). 4268 llvm::Value *TaskPrivatesMap = nullptr; 4269 llvm::Type *TaskPrivatesMapTy = 4270 std::next(TaskFunction->arg_begin(), 3)->getType(); 4271 if (!Privates.empty()) { 4272 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4273 TaskPrivatesMap = 4274 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4275 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4276 TaskPrivatesMap, TaskPrivatesMapTy); 4277 } else { 4278 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4279 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4280 } 4281 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4282 // kmp_task_t *tt); 4283 llvm::Function *TaskEntry = emitProxyTaskFunction( 4284 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4285 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4286 TaskPrivatesMap); 4287 4288 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4289 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4290 // kmp_routine_entry_t *task_entry); 4291 // Task flags. Format is taken from 4292 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4293 // description of kmp_tasking_flags struct. 4294 enum { 4295 TiedFlag = 0x1, 4296 FinalFlag = 0x2, 4297 DestructorsFlag = 0x8, 4298 PriorityFlag = 0x20, 4299 DetachableFlag = 0x40, 4300 }; 4301 unsigned Flags = Data.Tied ? TiedFlag : 0; 4302 bool NeedsCleanup = false; 4303 if (!Privates.empty()) { 4304 NeedsCleanup = 4305 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4306 if (NeedsCleanup) 4307 Flags = Flags | DestructorsFlag; 4308 } 4309 if (Data.Priority.getInt()) 4310 Flags = Flags | PriorityFlag; 4311 if (D.hasClausesOfKind<OMPDetachClause>()) 4312 Flags = Flags | DetachableFlag; 4313 llvm::Value *TaskFlags = 4314 Data.Final.getPointer() 4315 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4316 CGF.Builder.getInt32(FinalFlag), 4317 CGF.Builder.getInt32(/*C=*/0)) 4318 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4319 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4320 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4321 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4322 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4323 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4324 TaskEntry, KmpRoutineEntryPtrTy)}; 4325 llvm::Value *NewTask; 4326 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4327 // Check if we have any device clause associated with the directive. 4328 const Expr *Device = nullptr; 4329 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4330 Device = C->getDevice(); 4331 // Emit device ID if any otherwise use default value. 4332 llvm::Value *DeviceID; 4333 if (Device) 4334 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4335 CGF.Int64Ty, /*isSigned=*/true); 4336 else 4337 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4338 AllocArgs.push_back(DeviceID); 4339 NewTask = CGF.EmitRuntimeCall( 4340 OMPBuilder.getOrCreateRuntimeFunction( 4341 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4342 AllocArgs); 4343 } else { 4344 NewTask = 4345 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4346 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4347 AllocArgs); 4348 } 4349 // Emit detach clause initialization. 4350 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4351 // task_descriptor); 4352 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4353 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4354 LValue EvtLVal = CGF.EmitLValue(Evt); 4355 4356 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4357 // int gtid, kmp_task_t *task); 4358 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4359 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4360 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4361 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4362 OMPBuilder.getOrCreateRuntimeFunction( 4363 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4364 {Loc, Tid, NewTask}); 4365 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4366 Evt->getExprLoc()); 4367 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4368 } 4369 // Process affinity clauses. 4370 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4371 // Process list of affinity data. 4372 ASTContext &C = CGM.getContext(); 4373 Address AffinitiesArray = Address::invalid(); 4374 // Calculate number of elements to form the array of affinity data. 4375 llvm::Value *NumOfElements = nullptr; 4376 unsigned NumAffinities = 0; 4377 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4378 if (const Expr *Modifier = C->getModifier()) { 4379 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4380 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4381 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4382 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4383 NumOfElements = 4384 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4385 } 4386 } else { 4387 NumAffinities += C->varlist_size(); 4388 } 4389 } 4390 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4391 // Fields ids in kmp_task_affinity_info record. 4392 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4393 4394 QualType KmpTaskAffinityInfoArrayTy; 4395 if (NumOfElements) { 4396 NumOfElements = CGF.Builder.CreateNUWAdd( 4397 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4398 OpaqueValueExpr OVE( 4399 Loc, 4400 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4401 VK_RValue); 4402 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4403 RValue::get(NumOfElements)); 4404 KmpTaskAffinityInfoArrayTy = 4405 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4406 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4407 // Properly emit variable-sized array. 4408 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4409 ImplicitParamDecl::Other); 4410 CGF.EmitVarDecl(*PD); 4411 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4412 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4413 /*isSigned=*/false); 4414 } else { 4415 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4416 KmpTaskAffinityInfoTy, 4417 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4418 ArrayType::Normal, /*IndexTypeQuals=*/0); 4419 AffinitiesArray = 4420 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4421 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4422 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4423 /*isSigned=*/false); 4424 } 4425 4426 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4427 // Fill array by elements without iterators. 4428 unsigned Pos = 0; 4429 bool HasIterator = false; 4430 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4431 if (C->getModifier()) { 4432 HasIterator = true; 4433 continue; 4434 } 4435 for (const Expr *E : C->varlists()) { 4436 llvm::Value *Addr; 4437 llvm::Value *Size; 4438 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4439 LValue Base = 4440 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4441 KmpTaskAffinityInfoTy); 4442 // affs[i].base_addr = &<Affinities[i].second>; 4443 LValue BaseAddrLVal = CGF.EmitLValueForField( 4444 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4445 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4446 BaseAddrLVal); 4447 // affs[i].len = sizeof(<Affinities[i].second>); 4448 LValue LenLVal = CGF.EmitLValueForField( 4449 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4450 CGF.EmitStoreOfScalar(Size, LenLVal); 4451 ++Pos; 4452 } 4453 } 4454 LValue PosLVal; 4455 if (HasIterator) { 4456 PosLVal = CGF.MakeAddrLValue( 4457 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4458 C.getSizeType()); 4459 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4460 } 4461 // Process elements with iterators. 4462 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4463 const Expr *Modifier = C->getModifier(); 4464 if (!Modifier) 4465 continue; 4466 OMPIteratorGeneratorScope IteratorScope( 4467 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4468 for (const Expr *E : C->varlists()) { 4469 llvm::Value *Addr; 4470 llvm::Value *Size; 4471 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4472 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4473 LValue Base = CGF.MakeAddrLValue( 4474 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4475 AffinitiesArray.getAlignment()), 4476 KmpTaskAffinityInfoTy); 4477 // affs[i].base_addr = &<Affinities[i].second>; 4478 LValue BaseAddrLVal = CGF.EmitLValueForField( 4479 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4480 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4481 BaseAddrLVal); 4482 // affs[i].len = sizeof(<Affinities[i].second>); 4483 LValue LenLVal = CGF.EmitLValueForField( 4484 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4485 CGF.EmitStoreOfScalar(Size, LenLVal); 4486 Idx = CGF.Builder.CreateNUWAdd( 4487 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4488 CGF.EmitStoreOfScalar(Idx, PosLVal); 4489 } 4490 } 4491 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4492 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4493 // naffins, kmp_task_affinity_info_t *affin_list); 4494 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4495 llvm::Value *GTid = getThreadID(CGF, Loc); 4496 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4497 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4498 // FIXME: Emit the function and ignore its result for now unless the 4499 // runtime function is properly implemented. 4500 (void)CGF.EmitRuntimeCall( 4501 OMPBuilder.getOrCreateRuntimeFunction( 4502 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4503 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4504 } 4505 llvm::Value *NewTaskNewTaskTTy = 4506 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4507 NewTask, KmpTaskTWithPrivatesPtrTy); 4508 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4509 KmpTaskTWithPrivatesQTy); 4510 LValue TDBase = 4511 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4512 // Fill the data in the resulting kmp_task_t record. 4513 // Copy shareds if there are any. 4514 Address KmpTaskSharedsPtr = Address::invalid(); 4515 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4516 KmpTaskSharedsPtr = 4517 Address(CGF.EmitLoadOfScalar( 4518 CGF.EmitLValueForField( 4519 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4520 KmpTaskTShareds)), 4521 Loc), 4522 CGM.getNaturalTypeAlignment(SharedsTy)); 4523 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4524 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4525 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4526 } 4527 // Emit initial values for private copies (if any). 4528 TaskResultTy Result; 4529 if (!Privates.empty()) { 4530 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4531 SharedsTy, SharedsPtrTy, Data, Privates, 4532 /*ForDup=*/false); 4533 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4534 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4535 Result.TaskDupFn = emitTaskDupFunction( 4536 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4537 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4538 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4539 } 4540 } 4541 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4542 enum { Priority = 0, Destructors = 1 }; 4543 // Provide pointer to function with destructors for privates. 4544 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4545 const RecordDecl *KmpCmplrdataUD = 4546 (*FI)->getType()->getAsUnionType()->getDecl(); 4547 if (NeedsCleanup) { 4548 llvm::Value *DestructorFn = emitDestructorsFunction( 4549 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4550 KmpTaskTWithPrivatesQTy); 4551 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4552 LValue DestructorsLV = CGF.EmitLValueForField( 4553 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4554 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4555 DestructorFn, KmpRoutineEntryPtrTy), 4556 DestructorsLV); 4557 } 4558 // Set priority. 4559 if (Data.Priority.getInt()) { 4560 LValue Data2LV = CGF.EmitLValueForField( 4561 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4562 LValue PriorityLV = CGF.EmitLValueForField( 4563 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4564 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4565 } 4566 Result.NewTask = NewTask; 4567 Result.TaskEntry = TaskEntry; 4568 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4569 Result.TDBase = TDBase; 4570 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4571 return Result; 4572} 4573 4574namespace { 4575/// Dependence kind for RTL. 4576enum RTLDependenceKindTy { 4577 DepIn = 0x01, 4578 DepInOut = 0x3, 4579 DepMutexInOutSet = 0x4 4580}; 4581/// Fields ids in kmp_depend_info record. 4582enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4583} // namespace 4584 4585/// Translates internal dependency kind into the runtime kind. 4586static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4587 RTLDependenceKindTy DepKind; 4588 switch (K) { 4589 case OMPC_DEPEND_in: 4590 DepKind = DepIn; 4591 break; 4592 // Out and InOut dependencies must use the same code. 4593 case OMPC_DEPEND_out: 4594 case OMPC_DEPEND_inout: 4595 DepKind = DepInOut; 4596 break; 4597 case OMPC_DEPEND_mutexinoutset: 4598 DepKind = DepMutexInOutSet; 4599 break; 4600 case OMPC_DEPEND_source: 4601 case OMPC_DEPEND_sink: 4602 case OMPC_DEPEND_depobj: 4603 case OMPC_DEPEND_unknown: 4604 llvm_unreachable("Unknown task dependence type"); 4605 } 4606 return DepKind; 4607} 4608 4609/// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4610static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4611 QualType &FlagsTy) { 4612 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4613 if (KmpDependInfoTy.isNull()) { 4614 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4615 KmpDependInfoRD->startDefinition(); 4616 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4617 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4618 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4619 KmpDependInfoRD->completeDefinition(); 4620 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4621 } 4622} 4623 4624std::pair<llvm::Value *, LValue> 4625CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4626 SourceLocation Loc) { 4627 ASTContext &C = CGM.getContext(); 4628 QualType FlagsTy; 4629 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4630 RecordDecl *KmpDependInfoRD = 4631 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4632 LValue Base = CGF.EmitLoadOfPointerLValue( 4633 DepobjLVal.getAddress(CGF), 4634 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4635 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4636 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4637 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4638 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4639 Base.getTBAAInfo()); 4640 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4641 Addr.getPointer(), 4642 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4643 LValue NumDepsBase = CGF.MakeAddrLValue( 4644 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4645 Base.getBaseInfo(), Base.getTBAAInfo()); 4646 // NumDeps = deps[i].base_addr; 4647 LValue BaseAddrLVal = CGF.EmitLValueForField( 4648 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4649 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4650 return std::make_pair(NumDeps, Base); 4651} 4652 4653static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4654 llvm::PointerUnion<unsigned *, LValue *> Pos, 4655 const OMPTaskDataTy::DependData &Data, 4656 Address DependenciesArray) { 4657 CodeGenModule &CGM = CGF.CGM; 4658 ASTContext &C = CGM.getContext(); 4659 QualType FlagsTy; 4660 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4661 RecordDecl *KmpDependInfoRD = 4662 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4663 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4664 4665 OMPIteratorGeneratorScope IteratorScope( 4666 CGF, cast_or_null<OMPIteratorExpr>( 4667 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4668 : nullptr)); 4669 for (const Expr *E : Data.DepExprs) { 4670 llvm::Value *Addr; 4671 llvm::Value *Size; 4672 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4673 LValue Base; 4674 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4675 Base = CGF.MakeAddrLValue( 4676 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4677 } else { 4678 LValue &PosLVal = *Pos.get<LValue *>(); 4679 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4680 Base = CGF.MakeAddrLValue( 4681 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4682 DependenciesArray.getAlignment()), 4683 KmpDependInfoTy); 4684 } 4685 // deps[i].base_addr = &<Dependencies[i].second>; 4686 LValue BaseAddrLVal = CGF.EmitLValueForField( 4687 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4688 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4689 BaseAddrLVal); 4690 // deps[i].len = sizeof(<Dependencies[i].second>); 4691 LValue LenLVal = CGF.EmitLValueForField( 4692 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4693 CGF.EmitStoreOfScalar(Size, LenLVal); 4694 // deps[i].flags = <Dependencies[i].first>; 4695 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4696 LValue FlagsLVal = CGF.EmitLValueForField( 4697 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4698 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4699 FlagsLVal); 4700 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4701 ++(*P); 4702 } else { 4703 LValue &PosLVal = *Pos.get<LValue *>(); 4704 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4705 Idx = CGF.Builder.CreateNUWAdd(Idx, 4706 llvm::ConstantInt::get(Idx->getType(), 1)); 4707 CGF.EmitStoreOfScalar(Idx, PosLVal); 4708 } 4709 } 4710} 4711 4712static SmallVector<llvm::Value *, 4> 4713emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4714 const OMPTaskDataTy::DependData &Data) { 4715 assert(Data.DepKind == OMPC_DEPEND_depobj && 4716 "Expected depobj dependecy kind."); 4717 SmallVector<llvm::Value *, 4> Sizes; 4718 SmallVector<LValue, 4> SizeLVals; 4719 ASTContext &C = CGF.getContext(); 4720 QualType FlagsTy; 4721 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4722 RecordDecl *KmpDependInfoRD = 4723 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4724 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4725 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4726 { 4727 OMPIteratorGeneratorScope IteratorScope( 4728 CGF, cast_or_null<OMPIteratorExpr>( 4729 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4730 : nullptr)); 4731 for (const Expr *E : Data.DepExprs) { 4732 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4733 LValue Base = CGF.EmitLoadOfPointerLValue( 4734 DepobjLVal.getAddress(CGF), 4735 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4736 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4737 Base.getAddress(CGF), KmpDependInfoPtrT); 4738 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4739 Base.getTBAAInfo()); 4740 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4741 Addr.getPointer(), 4742 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4743 LValue NumDepsBase = CGF.MakeAddrLValue( 4744 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4745 Base.getBaseInfo(), Base.getTBAAInfo()); 4746 // NumDeps = deps[i].base_addr; 4747 LValue BaseAddrLVal = CGF.EmitLValueForField( 4748 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4749 llvm::Value *NumDeps = 4750 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4751 LValue NumLVal = CGF.MakeAddrLValue( 4752 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4753 C.getUIntPtrType()); 4754 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4755 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4756 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4757 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4758 CGF.EmitStoreOfScalar(Add, NumLVal); 4759 SizeLVals.push_back(NumLVal); 4760 } 4761 } 4762 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4763 llvm::Value *Size = 4764 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4765 Sizes.push_back(Size); 4766 } 4767 return Sizes; 4768} 4769 4770static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4771 LValue PosLVal, 4772 const OMPTaskDataTy::DependData &Data, 4773 Address DependenciesArray) { 4774 assert(Data.DepKind == OMPC_DEPEND_depobj && 4775 "Expected depobj dependecy kind."); 4776 ASTContext &C = CGF.getContext(); 4777 QualType FlagsTy; 4778 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4779 RecordDecl *KmpDependInfoRD = 4780 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4781 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4782 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4783 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4784 { 4785 OMPIteratorGeneratorScope IteratorScope( 4786 CGF, cast_or_null<OMPIteratorExpr>( 4787 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4788 : nullptr)); 4789 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4790 const Expr *E = Data.DepExprs[I]; 4791 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4792 LValue Base = CGF.EmitLoadOfPointerLValue( 4793 DepobjLVal.getAddress(CGF), 4794 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4795 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4796 Base.getAddress(CGF), KmpDependInfoPtrT); 4797 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4798 Base.getTBAAInfo()); 4799 4800 // Get number of elements in a single depobj. 4801 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4802 Addr.getPointer(), 4803 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4804 LValue NumDepsBase = CGF.MakeAddrLValue( 4805 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4806 Base.getBaseInfo(), Base.getTBAAInfo()); 4807 // NumDeps = deps[i].base_addr; 4808 LValue BaseAddrLVal = CGF.EmitLValueForField( 4809 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4810 llvm::Value *NumDeps = 4811 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4812 4813 // memcopy dependency data. 4814 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4815 ElSize, 4816 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4817 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4818 Address DepAddr = 4819 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4820 DependenciesArray.getAlignment()); 4821 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4822 4823 // Increase pos. 4824 // pos += size; 4825 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4826 CGF.EmitStoreOfScalar(Add, PosLVal); 4827 } 4828 } 4829} 4830 4831std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4832 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4833 SourceLocation Loc) { 4834 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4835 return D.DepExprs.empty(); 4836 })) 4837 return std::make_pair(nullptr, Address::invalid()); 4838 // Process list of dependencies. 4839 ASTContext &C = CGM.getContext(); 4840 Address DependenciesArray = Address::invalid(); 4841 llvm::Value *NumOfElements = nullptr; 4842 unsigned NumDependencies = std::accumulate( 4843 Dependencies.begin(), Dependencies.end(), 0, 4844 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4845 return D.DepKind == OMPC_DEPEND_depobj 4846 ? V 4847 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4848 }); 4849 QualType FlagsTy; 4850 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4851 bool HasDepobjDeps = false; 4852 bool HasRegularWithIterators = false; 4853 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4854 llvm::Value *NumOfRegularWithIterators = 4855 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4856 // Calculate number of depobj dependecies and regular deps with the iterators. 4857 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4858 if (D.DepKind == OMPC_DEPEND_depobj) { 4859 SmallVector<llvm::Value *, 4> Sizes = 4860 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4861 for (llvm::Value *Size : Sizes) { 4862 NumOfDepobjElements = 4863 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4864 } 4865 HasDepobjDeps = true; 4866 continue; 4867 } 4868 // Include number of iterations, if any. 4869 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4870 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4871 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4872 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4873 NumOfRegularWithIterators = 4874 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4875 } 4876 HasRegularWithIterators = true; 4877 continue; 4878 } 4879 } 4880 4881 QualType KmpDependInfoArrayTy; 4882 if (HasDepobjDeps || HasRegularWithIterators) { 4883 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4884 /*isSigned=*/false); 4885 if (HasDepobjDeps) { 4886 NumOfElements = 4887 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4888 } 4889 if (HasRegularWithIterators) { 4890 NumOfElements = 4891 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4892 } 4893 OpaqueValueExpr OVE(Loc, 4894 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4895 VK_RValue); 4896 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4897 RValue::get(NumOfElements)); 4898 KmpDependInfoArrayTy = 4899 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4900 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4901 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4902 // Properly emit variable-sized array. 4903 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4904 ImplicitParamDecl::Other); 4905 CGF.EmitVarDecl(*PD); 4906 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4907 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4908 /*isSigned=*/false); 4909 } else { 4910 KmpDependInfoArrayTy = C.getConstantArrayType( 4911 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4912 ArrayType::Normal, /*IndexTypeQuals=*/0); 4913 DependenciesArray = 4914 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4915 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4916 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4917 /*isSigned=*/false); 4918 } 4919 unsigned Pos = 0; 4920 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4921 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4922 Dependencies[I].IteratorExpr) 4923 continue; 4924 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4925 DependenciesArray); 4926 } 4927 // Copy regular dependecies with iterators. 4928 LValue PosLVal = CGF.MakeAddrLValue( 4929 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4930 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4931 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4932 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4933 !Dependencies[I].IteratorExpr) 4934 continue; 4935 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4936 DependenciesArray); 4937 } 4938 // Copy final depobj arrays without iterators. 4939 if (HasDepobjDeps) { 4940 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4941 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4942 continue; 4943 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4944 DependenciesArray); 4945 } 4946 } 4947 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4948 DependenciesArray, CGF.VoidPtrTy); 4949 return std::make_pair(NumOfElements, DependenciesArray); 4950} 4951 4952Address CGOpenMPRuntime::emitDepobjDependClause( 4953 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4954 SourceLocation Loc) { 4955 if (Dependencies.DepExprs.empty()) 4956 return Address::invalid(); 4957 // Process list of dependencies. 4958 ASTContext &C = CGM.getContext(); 4959 Address DependenciesArray = Address::invalid(); 4960 unsigned NumDependencies = Dependencies.DepExprs.size(); 4961 QualType FlagsTy; 4962 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4963 RecordDecl *KmpDependInfoRD = 4964 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4965 4966 llvm::Value *Size; 4967 // Define type kmp_depend_info[<Dependencies.size()>]; 4968 // For depobj reserve one extra element to store the number of elements. 4969 // It is required to handle depobj(x) update(in) construct. 4970 // kmp_depend_info[<Dependencies.size()>] deps; 4971 llvm::Value *NumDepsVal; 4972 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4973 if (const auto *IE = 4974 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4975 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4976 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4977 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4978 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4979 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4980 } 4981 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4982 NumDepsVal); 4983 CharUnits SizeInBytes = 4984 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4985 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4986 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4987 NumDepsVal = 4988 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4989 } else { 4990 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4991 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4992 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4993 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4994 Size = CGM.getSize(Sz.alignTo(Align)); 4995 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4996 } 4997 // Need to allocate on the dynamic memory. 4998 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4999 // Use default allocator. 5000 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5001 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5002 5003 llvm::Value *Addr = 5004 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5005 CGM.getModule(), OMPRTL___kmpc_alloc), 5006 Args, ".dep.arr.addr"); 5007 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5008 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5009 DependenciesArray = Address(Addr, Align); 5010 // Write number of elements in the first element of array for depobj. 5011 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5012 // deps[i].base_addr = NumDependencies; 5013 LValue BaseAddrLVal = CGF.EmitLValueForField( 5014 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5015 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5016 llvm::PointerUnion<unsigned *, LValue *> Pos; 5017 unsigned Idx = 1; 5018 LValue PosLVal; 5019 if (Dependencies.IteratorExpr) { 5020 PosLVal = CGF.MakeAddrLValue( 5021 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5022 C.getSizeType()); 5023 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5024 /*IsInit=*/true); 5025 Pos = &PosLVal; 5026 } else { 5027 Pos = &Idx; 5028 } 5029 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5030 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5031 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5032 return DependenciesArray; 5033} 5034 5035void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5036 SourceLocation Loc) { 5037 ASTContext &C = CGM.getContext(); 5038 QualType FlagsTy; 5039 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5040 LValue Base = CGF.EmitLoadOfPointerLValue( 5041 DepobjLVal.getAddress(CGF), 5042 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5043 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5044 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5045 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5046 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5047 Addr.getPointer(), 5048 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5049 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5050 CGF.VoidPtrTy); 5051 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5052 // Use default allocator. 5053 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5054 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5055 5056 // _kmpc_free(gtid, addr, nullptr); 5057 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5058 CGM.getModule(), OMPRTL___kmpc_free), 5059 Args); 5060} 5061 5062void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5063 OpenMPDependClauseKind NewDepKind, 5064 SourceLocation Loc) { 5065 ASTContext &C = CGM.getContext(); 5066 QualType FlagsTy; 5067 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5068 RecordDecl *KmpDependInfoRD = 5069 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5070 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5071 llvm::Value *NumDeps; 5072 LValue Base; 5073 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5074 5075 Address Begin = Base.getAddress(CGF); 5076 // Cast from pointer to array type to pointer to single element. 5077 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5078 // The basic structure here is a while-do loop. 5079 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5080 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5081 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5082 CGF.EmitBlock(BodyBB); 5083 llvm::PHINode *ElementPHI = 5084 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5085 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5086 Begin = Address(ElementPHI, Begin.getAlignment()); 5087 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5088 Base.getTBAAInfo()); 5089 // deps[i].flags = NewDepKind; 5090 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5091 LValue FlagsLVal = CGF.EmitLValueForField( 5092 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5093 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5094 FlagsLVal); 5095 5096 // Shift the address forward by one element. 5097 Address ElementNext = 5098 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5099 ElementPHI->addIncoming(ElementNext.getPointer(), 5100 CGF.Builder.GetInsertBlock()); 5101 llvm::Value *IsEmpty = 5102 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5103 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5104 // Done. 5105 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5106} 5107 5108void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5109 const OMPExecutableDirective &D, 5110 llvm::Function *TaskFunction, 5111 QualType SharedsTy, Address Shareds, 5112 const Expr *IfCond, 5113 const OMPTaskDataTy &Data) { 5114 if (!CGF.HaveInsertPoint()) 5115 return; 5116 5117 TaskResultTy Result = 5118 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5119 llvm::Value *NewTask = Result.NewTask; 5120 llvm::Function *TaskEntry = Result.TaskEntry; 5121 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5122 LValue TDBase = Result.TDBase; 5123 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5124 // Process list of dependences. 5125 Address DependenciesArray = Address::invalid(); 5126 llvm::Value *NumOfElements; 5127 std::tie(NumOfElements, DependenciesArray) = 5128 emitDependClause(CGF, Data.Dependences, Loc); 5129 5130 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5131 // libcall. 5132 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5133 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5134 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5135 // list is not empty 5136 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5137 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5138 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5139 llvm::Value *DepTaskArgs[7]; 5140 if (!Data.Dependences.empty()) { 5141 DepTaskArgs[0] = UpLoc; 5142 DepTaskArgs[1] = ThreadID; 5143 DepTaskArgs[2] = NewTask; 5144 DepTaskArgs[3] = NumOfElements; 5145 DepTaskArgs[4] = DependenciesArray.getPointer(); 5146 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5147 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5148 } 5149 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5150 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5151 if (!Data.Tied) { 5152 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5153 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5154 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5155 } 5156 if (!Data.Dependences.empty()) { 5157 CGF.EmitRuntimeCall( 5158 OMPBuilder.getOrCreateRuntimeFunction( 5159 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5160 DepTaskArgs); 5161 } else { 5162 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5163 CGM.getModule(), OMPRTL___kmpc_omp_task), 5164 TaskArgs); 5165 } 5166 // Check if parent region is untied and build return for untied task; 5167 if (auto *Region = 5168 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5169 Region->emitUntiedSwitch(CGF); 5170 }; 5171 5172 llvm::Value *DepWaitTaskArgs[6]; 5173 if (!Data.Dependences.empty()) { 5174 DepWaitTaskArgs[0] = UpLoc; 5175 DepWaitTaskArgs[1] = ThreadID; 5176 DepWaitTaskArgs[2] = NumOfElements; 5177 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5178 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5179 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5180 } 5181 auto &M = CGM.getModule(); 5182 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5183 TaskEntry, &Data, &DepWaitTaskArgs, 5184 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5185 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5186 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5187 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5188 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5189 // is specified. 5190 if (!Data.Dependences.empty()) 5191 CGF.EmitRuntimeCall( 5192 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5193 DepWaitTaskArgs); 5194 // Call proxy_task_entry(gtid, new_task); 5195 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5196 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5197 Action.Enter(CGF); 5198 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5199 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5200 OutlinedFnArgs); 5201 }; 5202 5203 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5204 // kmp_task_t *new_task); 5205 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5206 // kmp_task_t *new_task); 5207 RegionCodeGenTy RCG(CodeGen); 5208 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5209 M, OMPRTL___kmpc_omp_task_begin_if0), 5210 TaskArgs, 5211 OMPBuilder.getOrCreateRuntimeFunction( 5212 M, OMPRTL___kmpc_omp_task_complete_if0), 5213 TaskArgs); 5214 RCG.setAction(Action); 5215 RCG(CGF); 5216 }; 5217 5218 if (IfCond) { 5219 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5220 } else { 5221 RegionCodeGenTy ThenRCG(ThenCodeGen); 5222 ThenRCG(CGF); 5223 } 5224} 5225 5226void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5227 const OMPLoopDirective &D, 5228 llvm::Function *TaskFunction, 5229 QualType SharedsTy, Address Shareds, 5230 const Expr *IfCond, 5231 const OMPTaskDataTy &Data) { 5232 if (!CGF.HaveInsertPoint()) 5233 return; 5234 TaskResultTy Result = 5235 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5236 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5237 // libcall. 5238 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5239 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5240 // sched, kmp_uint64 grainsize, void *task_dup); 5241 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5242 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5243 llvm::Value *IfVal; 5244 if (IfCond) { 5245 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5246 /*isSigned=*/true); 5247 } else { 5248 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5249 } 5250 5251 LValue LBLVal = CGF.EmitLValueForField( 5252 Result.TDBase, 5253 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5254 const auto *LBVar = 5255 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5256 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5257 LBLVal.getQuals(), 5258 /*IsInitializer=*/true); 5259 LValue UBLVal = CGF.EmitLValueForField( 5260 Result.TDBase, 5261 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5262 const auto *UBVar = 5263 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5264 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5265 UBLVal.getQuals(), 5266 /*IsInitializer=*/true); 5267 LValue StLVal = CGF.EmitLValueForField( 5268 Result.TDBase, 5269 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5270 const auto *StVar = 5271 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5272 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5273 StLVal.getQuals(), 5274 /*IsInitializer=*/true); 5275 // Store reductions address. 5276 LValue RedLVal = CGF.EmitLValueForField( 5277 Result.TDBase, 5278 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5279 if (Data.Reductions) { 5280 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5281 } else { 5282 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5283 CGF.getContext().VoidPtrTy); 5284 } 5285 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5286 llvm::Value *TaskArgs[] = { 5287 UpLoc, 5288 ThreadID, 5289 Result.NewTask, 5290 IfVal, 5291 LBLVal.getPointer(CGF), 5292 UBLVal.getPointer(CGF), 5293 CGF.EmitLoadOfScalar(StLVal, Loc), 5294 llvm::ConstantInt::getSigned( 5295 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5296 llvm::ConstantInt::getSigned( 5297 CGF.IntTy, Data.Schedule.getPointer() 5298 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5299 : NoSchedule), 5300 Data.Schedule.getPointer() 5301 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5302 /*isSigned=*/false) 5303 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5304 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5305 Result.TaskDupFn, CGF.VoidPtrTy) 5306 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5307 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5308 CGM.getModule(), OMPRTL___kmpc_taskloop), 5309 TaskArgs); 5310} 5311 5312/// Emit reduction operation for each element of array (required for 5313/// array sections) LHS op = RHS. 5314/// \param Type Type of array. 5315/// \param LHSVar Variable on the left side of the reduction operation 5316/// (references element of array in original variable). 5317/// \param RHSVar Variable on the right side of the reduction operation 5318/// (references element of array in original variable). 5319/// \param RedOpGen Generator of reduction operation with use of LHSVar and 5320/// RHSVar. 5321static void EmitOMPAggregateReduction( 5322 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5323 const VarDecl *RHSVar, 5324 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5325 const Expr *, const Expr *)> &RedOpGen, 5326 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5327 const Expr *UpExpr = nullptr) { 5328 // Perform element-by-element initialization. 5329 QualType ElementTy; 5330 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5331 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5332 5333 // Drill down to the base element type on both arrays. 5334 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5335 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5336 5337 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5338 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5339 // Cast from pointer to array type to pointer to single element. 5340 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5341 // The basic structure here is a while-do loop. 5342 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5343 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5344 llvm::Value *IsEmpty = 5345 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5346 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5347 5348 // Enter the loop body, making that address the current address. 5349 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5350 CGF.EmitBlock(BodyBB); 5351 5352 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5353 5354 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5355 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5356 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5357 Address RHSElementCurrent = 5358 Address(RHSElementPHI, 5359 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5360 5361 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5362 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5363 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5364 Address LHSElementCurrent = 5365 Address(LHSElementPHI, 5366 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5367 5368 // Emit copy. 5369 CodeGenFunction::OMPPrivateScope Scope(CGF); 5370 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5371 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5372 Scope.Privatize(); 5373 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5374 Scope.ForceCleanup(); 5375 5376 // Shift the address forward by one element. 5377 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5378 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5379 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5380 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5381 // Check whether we've reached the end. 5382 llvm::Value *Done = 5383 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5384 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5385 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5386 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5387 5388 // Done. 5389 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5390} 5391 5392/// Emit reduction combiner. If the combiner is a simple expression emit it as 5393/// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5394/// UDR combiner function. 5395static void emitReductionCombiner(CodeGenFunction &CGF, 5396 const Expr *ReductionOp) { 5397 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5398 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5399 if (const auto *DRE = 5400 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5401 if (const auto *DRD = 5402 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5403 std::pair<llvm::Function *, llvm::Function *> Reduction = 5404 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5405 RValue Func = RValue::get(Reduction.first); 5406 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5407 CGF.EmitIgnoredExpr(ReductionOp); 5408 return; 5409 } 5410 CGF.EmitIgnoredExpr(ReductionOp); 5411} 5412 5413llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5414 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5415 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5416 ArrayRef<const Expr *> ReductionOps) { 5417 ASTContext &C = CGM.getContext(); 5418 5419 // void reduction_func(void *LHSArg, void *RHSArg); 5420 FunctionArgList Args; 5421 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5422 ImplicitParamDecl::Other); 5423 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5424 ImplicitParamDecl::Other); 5425 Args.push_back(&LHSArg); 5426 Args.push_back(&RHSArg); 5427 const auto &CGFI = 5428 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5429 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5430 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5431 llvm::GlobalValue::InternalLinkage, Name, 5432 &CGM.getModule()); 5433 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5434 Fn->setDoesNotRecurse(); 5435 CodeGenFunction CGF(CGM); 5436 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5437 5438 // Dst = (void*[n])(LHSArg); 5439 // Src = (void*[n])(RHSArg); 5440 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5441 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5442 ArgsType), CGF.getPointerAlign()); 5443 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5444 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5445 ArgsType), CGF.getPointerAlign()); 5446 5447 // ... 5448 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5449 // ... 5450 CodeGenFunction::OMPPrivateScope Scope(CGF); 5451 auto IPriv = Privates.begin(); 5452 unsigned Idx = 0; 5453 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5454 const auto *RHSVar = 5455 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5456 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5457 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5458 }); 5459 const auto *LHSVar = 5460 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5461 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5462 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5463 }); 5464 QualType PrivTy = (*IPriv)->getType(); 5465 if (PrivTy->isVariablyModifiedType()) { 5466 // Get array size and emit VLA type. 5467 ++Idx; 5468 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5469 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5470 const VariableArrayType *VLA = 5471 CGF.getContext().getAsVariableArrayType(PrivTy); 5472 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5473 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5474 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5475 CGF.EmitVariablyModifiedType(PrivTy); 5476 } 5477 } 5478 Scope.Privatize(); 5479 IPriv = Privates.begin(); 5480 auto ILHS = LHSExprs.begin(); 5481 auto IRHS = RHSExprs.begin(); 5482 for (const Expr *E : ReductionOps) { 5483 if ((*IPriv)->getType()->isArrayType()) { 5484 // Emit reduction for array section. 5485 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5486 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5487 EmitOMPAggregateReduction( 5488 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5489 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5490 emitReductionCombiner(CGF, E); 5491 }); 5492 } else { 5493 // Emit reduction for array subscript or single variable. 5494 emitReductionCombiner(CGF, E); 5495 } 5496 ++IPriv; 5497 ++ILHS; 5498 ++IRHS; 5499 } 5500 Scope.ForceCleanup(); 5501 CGF.FinishFunction(); 5502 return Fn; 5503} 5504 5505void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5506 const Expr *ReductionOp, 5507 const Expr *PrivateRef, 5508 const DeclRefExpr *LHS, 5509 const DeclRefExpr *RHS) { 5510 if (PrivateRef->getType()->isArrayType()) { 5511 // Emit reduction for array section. 5512 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5513 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5514 EmitOMPAggregateReduction( 5515 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5516 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5517 emitReductionCombiner(CGF, ReductionOp); 5518 }); 5519 } else { 5520 // Emit reduction for array subscript or single variable. 5521 emitReductionCombiner(CGF, ReductionOp); 5522 } 5523} 5524 5525void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5526 ArrayRef<const Expr *> Privates, 5527 ArrayRef<const Expr *> LHSExprs, 5528 ArrayRef<const Expr *> RHSExprs, 5529 ArrayRef<const Expr *> ReductionOps, 5530 ReductionOptionsTy Options) { 5531 if (!CGF.HaveInsertPoint()) 5532 return; 5533 5534 bool WithNowait = Options.WithNowait; 5535 bool SimpleReduction = Options.SimpleReduction; 5536 5537 // Next code should be emitted for reduction: 5538 // 5539 // static kmp_critical_name lock = { 0 }; 5540 // 5541 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5542 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5543 // ... 5544 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5545 // *(Type<n>-1*)rhs[<n>-1]); 5546 // } 5547 // 5548 // ... 5549 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5550 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5551 // RedList, reduce_func, &<lock>)) { 5552 // case 1: 5553 // ... 5554 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5555 // ... 5556 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5557 // break; 5558 // case 2: 5559 // ... 5560 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5561 // ... 5562 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5563 // break; 5564 // default:; 5565 // } 5566 // 5567 // if SimpleReduction is true, only the next code is generated: 5568 // ... 5569 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5570 // ... 5571 5572 ASTContext &C = CGM.getContext(); 5573 5574 if (SimpleReduction) { 5575 CodeGenFunction::RunCleanupsScope Scope(CGF); 5576 auto IPriv = Privates.begin(); 5577 auto ILHS = LHSExprs.begin(); 5578 auto IRHS = RHSExprs.begin(); 5579 for (const Expr *E : ReductionOps) { 5580 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5581 cast<DeclRefExpr>(*IRHS)); 5582 ++IPriv; 5583 ++ILHS; 5584 ++IRHS; 5585 } 5586 return; 5587 } 5588 5589 // 1. Build a list of reduction variables. 5590 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5591 auto Size = RHSExprs.size(); 5592 for (const Expr *E : Privates) { 5593 if (E->getType()->isVariablyModifiedType()) 5594 // Reserve place for array size. 5595 ++Size; 5596 } 5597 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5598 QualType ReductionArrayTy = 5599 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5600 /*IndexTypeQuals=*/0); 5601 Address ReductionList = 5602 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5603 auto IPriv = Privates.begin(); 5604 unsigned Idx = 0; 5605 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5606 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5607 CGF.Builder.CreateStore( 5608 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5609 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5610 Elem); 5611 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5612 // Store array size. 5613 ++Idx; 5614 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5615 llvm::Value *Size = CGF.Builder.CreateIntCast( 5616 CGF.getVLASize( 5617 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5618 .NumElts, 5619 CGF.SizeTy, /*isSigned=*/false); 5620 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5621 Elem); 5622 } 5623 } 5624 5625 // 2. Emit reduce_func(). 5626 llvm::Function *ReductionFn = emitReductionFunction( 5627 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5628 LHSExprs, RHSExprs, ReductionOps); 5629 5630 // 3. Create static kmp_critical_name lock = { 0 }; 5631 std::string Name = getName({"reduction"}); 5632 llvm::Value *Lock = getCriticalRegionLock(Name); 5633 5634 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5635 // RedList, reduce_func, &<lock>); 5636 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5637 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5638 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5639 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5640 ReductionList.getPointer(), CGF.VoidPtrTy); 5641 llvm::Value *Args[] = { 5642 IdentTLoc, // ident_t *<loc> 5643 ThreadId, // i32 <gtid> 5644 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5645 ReductionArrayTySize, // size_type sizeof(RedList) 5646 RL, // void *RedList 5647 ReductionFn, // void (*) (void *, void *) <reduce_func> 5648 Lock // kmp_critical_name *&<lock> 5649 }; 5650 llvm::Value *Res = CGF.EmitRuntimeCall( 5651 OMPBuilder.getOrCreateRuntimeFunction( 5652 CGM.getModule(), 5653 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5654 Args); 5655 5656 // 5. Build switch(res) 5657 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5658 llvm::SwitchInst *SwInst = 5659 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5660 5661 // 6. Build case 1: 5662 // ... 5663 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5664 // ... 5665 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5666 // break; 5667 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5668 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5669 CGF.EmitBlock(Case1BB); 5670 5671 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5672 llvm::Value *EndArgs[] = { 5673 IdentTLoc, // ident_t *<loc> 5674 ThreadId, // i32 <gtid> 5675 Lock // kmp_critical_name *&<lock> 5676 }; 5677 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5678 CodeGenFunction &CGF, PrePostActionTy &Action) { 5679 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5680 auto IPriv = Privates.begin(); 5681 auto ILHS = LHSExprs.begin(); 5682 auto IRHS = RHSExprs.begin(); 5683 for (const Expr *E : ReductionOps) { 5684 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5685 cast<DeclRefExpr>(*IRHS)); 5686 ++IPriv; 5687 ++ILHS; 5688 ++IRHS; 5689 } 5690 }; 5691 RegionCodeGenTy RCG(CodeGen); 5692 CommonActionTy Action( 5693 nullptr, llvm::None, 5694 OMPBuilder.getOrCreateRuntimeFunction( 5695 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5696 : OMPRTL___kmpc_end_reduce), 5697 EndArgs); 5698 RCG.setAction(Action); 5699 RCG(CGF); 5700 5701 CGF.EmitBranch(DefaultBB); 5702 5703 // 7. Build case 2: 5704 // ... 5705 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5706 // ... 5707 // break; 5708 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5709 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5710 CGF.EmitBlock(Case2BB); 5711 5712 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5713 CodeGenFunction &CGF, PrePostActionTy &Action) { 5714 auto ILHS = LHSExprs.begin(); 5715 auto IRHS = RHSExprs.begin(); 5716 auto IPriv = Privates.begin(); 5717 for (const Expr *E : ReductionOps) { 5718 const Expr *XExpr = nullptr; 5719 const Expr *EExpr = nullptr; 5720 const Expr *UpExpr = nullptr; 5721 BinaryOperatorKind BO = BO_Comma; 5722 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5723 if (BO->getOpcode() == BO_Assign) { 5724 XExpr = BO->getLHS(); 5725 UpExpr = BO->getRHS(); 5726 } 5727 } 5728 // Try to emit update expression as a simple atomic. 5729 const Expr *RHSExpr = UpExpr; 5730 if (RHSExpr) { 5731 // Analyze RHS part of the whole expression. 5732 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5733 RHSExpr->IgnoreParenImpCasts())) { 5734 // If this is a conditional operator, analyze its condition for 5735 // min/max reduction operator. 5736 RHSExpr = ACO->getCond(); 5737 } 5738 if (const auto *BORHS = 5739 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5740 EExpr = BORHS->getRHS(); 5741 BO = BORHS->getOpcode(); 5742 } 5743 } 5744 if (XExpr) { 5745 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5746 auto &&AtomicRedGen = [BO, VD, 5747 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5748 const Expr *EExpr, const Expr *UpExpr) { 5749 LValue X = CGF.EmitLValue(XExpr); 5750 RValue E; 5751 if (EExpr) 5752 E = CGF.EmitAnyExpr(EExpr); 5753 CGF.EmitOMPAtomicSimpleUpdateExpr( 5754 X, E, BO, /*IsXLHSInRHSPart=*/true, 5755 llvm::AtomicOrdering::Monotonic, Loc, 5756 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5757 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5758 PrivateScope.addPrivate( 5759 VD, [&CGF, VD, XRValue, Loc]() { 5760 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5761 CGF.emitOMPSimpleStore( 5762 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5763 VD->getType().getNonReferenceType(), Loc); 5764 return LHSTemp; 5765 }); 5766 (void)PrivateScope.Privatize(); 5767 return CGF.EmitAnyExpr(UpExpr); 5768 }); 5769 }; 5770 if ((*IPriv)->getType()->isArrayType()) { 5771 // Emit atomic reduction for array section. 5772 const auto *RHSVar = 5773 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5774 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5775 AtomicRedGen, XExpr, EExpr, UpExpr); 5776 } else { 5777 // Emit atomic reduction for array subscript or single variable. 5778 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5779 } 5780 } else { 5781 // Emit as a critical region. 5782 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5783 const Expr *, const Expr *) { 5784 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5785 std::string Name = RT.getName({"atomic_reduction"}); 5786 RT.emitCriticalRegion( 5787 CGF, Name, 5788 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5789 Action.Enter(CGF); 5790 emitReductionCombiner(CGF, E); 5791 }, 5792 Loc); 5793 }; 5794 if ((*IPriv)->getType()->isArrayType()) { 5795 const auto *LHSVar = 5796 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5797 const auto *RHSVar = 5798 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5799 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5800 CritRedGen); 5801 } else { 5802 CritRedGen(CGF, nullptr, nullptr, nullptr); 5803 } 5804 } 5805 ++ILHS; 5806 ++IRHS; 5807 ++IPriv; 5808 } 5809 }; 5810 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5811 if (!WithNowait) { 5812 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5813 llvm::Value *EndArgs[] = { 5814 IdentTLoc, // ident_t *<loc> 5815 ThreadId, // i32 <gtid> 5816 Lock // kmp_critical_name *&<lock> 5817 }; 5818 CommonActionTy Action(nullptr, llvm::None, 5819 OMPBuilder.getOrCreateRuntimeFunction( 5820 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5821 EndArgs); 5822 AtomicRCG.setAction(Action); 5823 AtomicRCG(CGF); 5824 } else { 5825 AtomicRCG(CGF); 5826 } 5827 5828 CGF.EmitBranch(DefaultBB); 5829 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5830} 5831 5832/// Generates unique name for artificial threadprivate variables. 5833/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5834static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5835 const Expr *Ref) { 5836 SmallString<256> Buffer; 5837 llvm::raw_svector_ostream Out(Buffer); 5838 const clang::DeclRefExpr *DE; 5839 const VarDecl *D = ::getBaseDecl(Ref, DE); 5840 if (!D) 5841 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5842 D = D->getCanonicalDecl(); 5843 std::string Name = CGM.getOpenMPRuntime().getName( 5844 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5845 Out << Prefix << Name << "_" 5846 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5847 return std::string(Out.str()); 5848} 5849 5850/// Emits reduction initializer function: 5851/// \code 5852/// void @.red_init(void* %arg, void* %orig) { 5853/// %0 = bitcast void* %arg to <type>* 5854/// store <type> <init>, <type>* %0 5855/// ret void 5856/// } 5857/// \endcode 5858static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5859 SourceLocation Loc, 5860 ReductionCodeGen &RCG, unsigned N) { 5861 ASTContext &C = CGM.getContext(); 5862 QualType VoidPtrTy = C.VoidPtrTy; 5863 VoidPtrTy.addRestrict(); 5864 FunctionArgList Args; 5865 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5866 ImplicitParamDecl::Other); 5867 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5868 ImplicitParamDecl::Other); 5869 Args.emplace_back(&Param); 5870 Args.emplace_back(&ParamOrig); 5871 const auto &FnInfo = 5872 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5873 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5874 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5875 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5876 Name, &CGM.getModule()); 5877 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5878 Fn->setDoesNotRecurse(); 5879 CodeGenFunction CGF(CGM); 5880 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5881 Address PrivateAddr = CGF.EmitLoadOfPointer( 5882 CGF.GetAddrOfLocalVar(&Param), 5883 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5884 llvm::Value *Size = nullptr; 5885 // If the size of the reduction item is non-constant, load it from global 5886 // threadprivate variable. 5887 if (RCG.getSizes(N).second) { 5888 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5889 CGF, CGM.getContext().getSizeType(), 5890 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5891 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5892 CGM.getContext().getSizeType(), Loc); 5893 } 5894 RCG.emitAggregateType(CGF, N, Size); 5895 LValue OrigLVal; 5896 // If initializer uses initializer from declare reduction construct, emit a 5897 // pointer to the address of the original reduction item (reuired by reduction 5898 // initializer) 5899 if (RCG.usesReductionInitializer(N)) { 5900 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5901 SharedAddr = CGF.EmitLoadOfPointer( 5902 SharedAddr, 5903 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5904 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5905 } else { 5906 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5907 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5908 CGM.getContext().VoidPtrTy); 5909 } 5910 // Emit the initializer: 5911 // %0 = bitcast void* %arg to <type>* 5912 // store <type> <init>, <type>* %0 5913 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5914 [](CodeGenFunction &) { return false; }); 5915 CGF.FinishFunction(); 5916 return Fn; 5917} 5918 5919/// Emits reduction combiner function: 5920/// \code 5921/// void @.red_comb(void* %arg0, void* %arg1) { 5922/// %lhs = bitcast void* %arg0 to <type>* 5923/// %rhs = bitcast void* %arg1 to <type>* 5924/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5925/// store <type> %2, <type>* %lhs 5926/// ret void 5927/// } 5928/// \endcode 5929static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5930 SourceLocation Loc, 5931 ReductionCodeGen &RCG, unsigned N, 5932 const Expr *ReductionOp, 5933 const Expr *LHS, const Expr *RHS, 5934 const Expr *PrivateRef) { 5935 ASTContext &C = CGM.getContext(); 5936 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5937 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5938 FunctionArgList Args; 5939 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5940 C.VoidPtrTy, ImplicitParamDecl::Other); 5941 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5942 ImplicitParamDecl::Other); 5943 Args.emplace_back(&ParamInOut); 5944 Args.emplace_back(&ParamIn); 5945 const auto &FnInfo = 5946 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5947 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5948 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5949 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5950 Name, &CGM.getModule()); 5951 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5952 Fn->setDoesNotRecurse(); 5953 CodeGenFunction CGF(CGM); 5954 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5955 llvm::Value *Size = nullptr; 5956 // If the size of the reduction item is non-constant, load it from global 5957 // threadprivate variable. 5958 if (RCG.getSizes(N).second) { 5959 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5960 CGF, CGM.getContext().getSizeType(), 5961 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5962 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5963 CGM.getContext().getSizeType(), Loc); 5964 } 5965 RCG.emitAggregateType(CGF, N, Size); 5966 // Remap lhs and rhs variables to the addresses of the function arguments. 5967 // %lhs = bitcast void* %arg0 to <type>* 5968 // %rhs = bitcast void* %arg1 to <type>* 5969 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5970 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5971 // Pull out the pointer to the variable. 5972 Address PtrAddr = CGF.EmitLoadOfPointer( 5973 CGF.GetAddrOfLocalVar(&ParamInOut), 5974 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5975 return CGF.Builder.CreateElementBitCast( 5976 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5977 }); 5978 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5979 // Pull out the pointer to the variable. 5980 Address PtrAddr = CGF.EmitLoadOfPointer( 5981 CGF.GetAddrOfLocalVar(&ParamIn), 5982 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5983 return CGF.Builder.CreateElementBitCast( 5984 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5985 }); 5986 PrivateScope.Privatize(); 5987 // Emit the combiner body: 5988 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5989 // store <type> %2, <type>* %lhs 5990 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5991 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5992 cast<DeclRefExpr>(RHS)); 5993 CGF.FinishFunction(); 5994 return Fn; 5995} 5996 5997/// Emits reduction finalizer function: 5998/// \code 5999/// void @.red_fini(void* %arg) { 6000/// %0 = bitcast void* %arg to <type>* 6001/// <destroy>(<type>* %0) 6002/// ret void 6003/// } 6004/// \endcode 6005static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6006 SourceLocation Loc, 6007 ReductionCodeGen &RCG, unsigned N) { 6008 if (!RCG.needCleanups(N)) 6009 return nullptr; 6010 ASTContext &C = CGM.getContext(); 6011 FunctionArgList Args; 6012 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6013 ImplicitParamDecl::Other); 6014 Args.emplace_back(&Param); 6015 const auto &FnInfo = 6016 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6017 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6018 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6019 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6020 Name, &CGM.getModule()); 6021 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6022 Fn->setDoesNotRecurse(); 6023 CodeGenFunction CGF(CGM); 6024 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6025 Address PrivateAddr = CGF.EmitLoadOfPointer( 6026 CGF.GetAddrOfLocalVar(&Param), 6027 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6028 llvm::Value *Size = nullptr; 6029 // If the size of the reduction item is non-constant, load it from global 6030 // threadprivate variable. 6031 if (RCG.getSizes(N).second) { 6032 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6033 CGF, CGM.getContext().getSizeType(), 6034 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6035 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6036 CGM.getContext().getSizeType(), Loc); 6037 } 6038 RCG.emitAggregateType(CGF, N, Size); 6039 // Emit the finalizer body: 6040 // <destroy>(<type>* %0) 6041 RCG.emitCleanups(CGF, N, PrivateAddr); 6042 CGF.FinishFunction(Loc); 6043 return Fn; 6044} 6045 6046llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6047 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6048 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6049 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6050 return nullptr; 6051 6052 // Build typedef struct: 6053 // kmp_taskred_input { 6054 // void *reduce_shar; // shared reduction item 6055 // void *reduce_orig; // original reduction item used for initialization 6056 // size_t reduce_size; // size of data item 6057 // void *reduce_init; // data initialization routine 6058 // void *reduce_fini; // data finalization routine 6059 // void *reduce_comb; // data combiner routine 6060 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6061 // } kmp_taskred_input_t; 6062 ASTContext &C = CGM.getContext(); 6063 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6064 RD->startDefinition(); 6065 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6066 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6067 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6068 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6069 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6070 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6071 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6072 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6073 RD->completeDefinition(); 6074 QualType RDType = C.getRecordType(RD); 6075 unsigned Size = Data.ReductionVars.size(); 6076 llvm::APInt ArraySize(/*numBits=*/64, Size); 6077 QualType ArrayRDType = C.getConstantArrayType( 6078 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6079 // kmp_task_red_input_t .rd_input.[Size]; 6080 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6081 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6082 Data.ReductionCopies, Data.ReductionOps); 6083 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6084 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6085 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6086 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6087 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6088 TaskRedInput.getPointer(), Idxs, 6089 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6090 ".rd_input.gep."); 6091 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6092 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6093 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6094 RCG.emitSharedOrigLValue(CGF, Cnt); 6095 llvm::Value *CastedShared = 6096 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6097 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6098 // ElemLVal.reduce_orig = &Origs[Cnt]; 6099 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6100 llvm::Value *CastedOrig = 6101 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6102 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6103 RCG.emitAggregateType(CGF, Cnt); 6104 llvm::Value *SizeValInChars; 6105 llvm::Value *SizeVal; 6106 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6107 // We use delayed creation/initialization for VLAs and array sections. It is 6108 // required because runtime does not provide the way to pass the sizes of 6109 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6110 // threadprivate global variables are used to store these values and use 6111 // them in the functions. 6112 bool DelayedCreation = !!SizeVal; 6113 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6114 /*isSigned=*/false); 6115 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6116 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6117 // ElemLVal.reduce_init = init; 6118 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6119 llvm::Value *InitAddr = 6120 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6121 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6122 // ElemLVal.reduce_fini = fini; 6123 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6124 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6125 llvm::Value *FiniAddr = Fini 6126 ? CGF.EmitCastToVoidPtr(Fini) 6127 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6128 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6129 // ElemLVal.reduce_comb = comb; 6130 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6131 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6132 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6133 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6134 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6135 // ElemLVal.flags = 0; 6136 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6137 if (DelayedCreation) { 6138 CGF.EmitStoreOfScalar( 6139 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6140 FlagsLVal); 6141 } else 6142 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6143 FlagsLVal.getType()); 6144 } 6145 if (Data.IsReductionWithTaskMod) { 6146 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6147 // is_ws, int num, void *data); 6148 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6149 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6150 CGM.IntTy, /*isSigned=*/true); 6151 llvm::Value *Args[] = { 6152 IdentTLoc, GTid, 6153 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6154 /*isSigned=*/true), 6155 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6156 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6157 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6158 return CGF.EmitRuntimeCall( 6159 OMPBuilder.getOrCreateRuntimeFunction( 6160 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6161 Args); 6162 } 6163 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6164 llvm::Value *Args[] = { 6165 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6166 /*isSigned=*/true), 6167 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6168 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6169 CGM.VoidPtrTy)}; 6170 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6171 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6172 Args); 6173} 6174 6175void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6176 SourceLocation Loc, 6177 bool IsWorksharingReduction) { 6178 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6179 // is_ws, int num, void *data); 6180 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6181 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6182 CGM.IntTy, /*isSigned=*/true); 6183 llvm::Value *Args[] = {IdentTLoc, GTid, 6184 llvm::ConstantInt::get(CGM.IntTy, 6185 IsWorksharingReduction ? 1 : 0, 6186 /*isSigned=*/true)}; 6187 (void)CGF.EmitRuntimeCall( 6188 OMPBuilder.getOrCreateRuntimeFunction( 6189 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6190 Args); 6191} 6192 6193void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6194 SourceLocation Loc, 6195 ReductionCodeGen &RCG, 6196 unsigned N) { 6197 auto Sizes = RCG.getSizes(N); 6198 // Emit threadprivate global variable if the type is non-constant 6199 // (Sizes.second = nullptr). 6200 if (Sizes.second) { 6201 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6202 /*isSigned=*/false); 6203 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6204 CGF, CGM.getContext().getSizeType(), 6205 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6206 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6207 } 6208} 6209 6210Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6211 SourceLocation Loc, 6212 llvm::Value *ReductionsPtr, 6213 LValue SharedLVal) { 6214 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6215 // *d); 6216 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6217 CGM.IntTy, 6218 /*isSigned=*/true), 6219 ReductionsPtr, 6220 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6221 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6222 return Address( 6223 CGF.EmitRuntimeCall( 6224 OMPBuilder.getOrCreateRuntimeFunction( 6225 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6226 Args), 6227 SharedLVal.getAlignment()); 6228} 6229 6230void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6231 SourceLocation Loc) { 6232 if (!CGF.HaveInsertPoint()) 6233 return; 6234 6235 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6236 OMPBuilder.createTaskwait(CGF.Builder); 6237 } else { 6238 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6239 // global_tid); 6240 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6241 // Ignore return result until untied tasks are supported. 6242 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6243 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6244 Args); 6245 } 6246 6247 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6248 Region->emitUntiedSwitch(CGF); 6249} 6250 6251void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6252 OpenMPDirectiveKind InnerKind, 6253 const RegionCodeGenTy &CodeGen, 6254 bool HasCancel) { 6255 if (!CGF.HaveInsertPoint()) 6256 return; 6257 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6258 InnerKind != OMPD_critical && 6259 InnerKind != OMPD_master && 6260 InnerKind != OMPD_masked); 6261 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6262} 6263 6264namespace { 6265enum RTCancelKind { 6266 CancelNoreq = 0, 6267 CancelParallel = 1, 6268 CancelLoop = 2, 6269 CancelSections = 3, 6270 CancelTaskgroup = 4 6271}; 6272} // anonymous namespace 6273 6274static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6275 RTCancelKind CancelKind = CancelNoreq; 6276 if (CancelRegion == OMPD_parallel) 6277 CancelKind = CancelParallel; 6278 else if (CancelRegion == OMPD_for) 6279 CancelKind = CancelLoop; 6280 else if (CancelRegion == OMPD_sections) 6281 CancelKind = CancelSections; 6282 else { 6283 assert(CancelRegion == OMPD_taskgroup); 6284 CancelKind = CancelTaskgroup; 6285 } 6286 return CancelKind; 6287} 6288 6289void CGOpenMPRuntime::emitCancellationPointCall( 6290 CodeGenFunction &CGF, SourceLocation Loc, 6291 OpenMPDirectiveKind CancelRegion) { 6292 if (!CGF.HaveInsertPoint()) 6293 return; 6294 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6295 // global_tid, kmp_int32 cncl_kind); 6296 if (auto *OMPRegionInfo = 6297 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6298 // For 'cancellation point taskgroup', the task region info may not have a 6299 // cancel. This may instead happen in another adjacent task. 6300 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6301 llvm::Value *Args[] = { 6302 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6303 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6304 // Ignore return result until untied tasks are supported. 6305 llvm::Value *Result = CGF.EmitRuntimeCall( 6306 OMPBuilder.getOrCreateRuntimeFunction( 6307 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6308 Args); 6309 // if (__kmpc_cancellationpoint()) { 6310 // exit from construct; 6311 // } 6312 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6313 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6314 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6315 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6316 CGF.EmitBlock(ExitBB); 6317 // exit from construct; 6318 CodeGenFunction::JumpDest CancelDest = 6319 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6320 CGF.EmitBranchThroughCleanup(CancelDest); 6321 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6322 } 6323 } 6324} 6325 6326void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6327 const Expr *IfCond, 6328 OpenMPDirectiveKind CancelRegion) { 6329 if (!CGF.HaveInsertPoint()) 6330 return; 6331 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6332 // kmp_int32 cncl_kind); 6333 auto &M = CGM.getModule(); 6334 if (auto *OMPRegionInfo = 6335 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6336 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6337 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6338 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6339 llvm::Value *Args[] = { 6340 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6341 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6342 // Ignore return result until untied tasks are supported. 6343 llvm::Value *Result = CGF.EmitRuntimeCall( 6344 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6345 // if (__kmpc_cancel()) { 6346 // exit from construct; 6347 // } 6348 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6349 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6350 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6351 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6352 CGF.EmitBlock(ExitBB); 6353 // exit from construct; 6354 CodeGenFunction::JumpDest CancelDest = 6355 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6356 CGF.EmitBranchThroughCleanup(CancelDest); 6357 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6358 }; 6359 if (IfCond) { 6360 emitIfClause(CGF, IfCond, ThenGen, 6361 [](CodeGenFunction &, PrePostActionTy &) {}); 6362 } else { 6363 RegionCodeGenTy ThenRCG(ThenGen); 6364 ThenRCG(CGF); 6365 } 6366 } 6367} 6368 6369namespace { 6370/// Cleanup action for uses_allocators support. 6371class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6372 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6373 6374public: 6375 OMPUsesAllocatorsActionTy( 6376 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6377 : Allocators(Allocators) {} 6378 void Enter(CodeGenFunction &CGF) override { 6379 if (!CGF.HaveInsertPoint()) 6380 return; 6381 for (const auto &AllocatorData : Allocators) { 6382 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6383 CGF, AllocatorData.first, AllocatorData.second); 6384 } 6385 } 6386 void Exit(CodeGenFunction &CGF) override { 6387 if (!CGF.HaveInsertPoint()) 6388 return; 6389 for (const auto &AllocatorData : Allocators) { 6390 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6391 AllocatorData.first); 6392 } 6393 } 6394}; 6395} // namespace 6396 6397void CGOpenMPRuntime::emitTargetOutlinedFunction( 6398 const OMPExecutableDirective &D, StringRef ParentName, 6399 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6400 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6401 assert(!ParentName.empty() && "Invalid target region parent name!"); 6402 HasEmittedTargetRegion = true; 6403 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6404 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6405 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6406 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6407 if (!D.AllocatorTraits) 6408 continue; 6409 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6410 } 6411 } 6412 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6413 CodeGen.setAction(UsesAllocatorAction); 6414 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6415 IsOffloadEntry, CodeGen); 6416} 6417 6418void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6419 const Expr *Allocator, 6420 const Expr *AllocatorTraits) { 6421 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6422 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6423 // Use default memspace handle. 6424 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6425 llvm::Value *NumTraits = llvm::ConstantInt::get( 6426 CGF.IntTy, cast<ConstantArrayType>( 6427 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6428 ->getSize() 6429 .getLimitedValue()); 6430 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6431 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6432 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6433 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6434 AllocatorTraitsLVal.getBaseInfo(), 6435 AllocatorTraitsLVal.getTBAAInfo()); 6436 llvm::Value *Traits = 6437 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6438 6439 llvm::Value *AllocatorVal = 6440 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6441 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6442 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6443 // Store to allocator. 6444 CGF.EmitVarDecl(*cast<VarDecl>( 6445 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6446 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6447 AllocatorVal = 6448 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6449 Allocator->getType(), Allocator->getExprLoc()); 6450 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6451} 6452 6453void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6454 const Expr *Allocator) { 6455 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6456 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6457 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6458 llvm::Value *AllocatorVal = 6459 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6460 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6461 CGF.getContext().VoidPtrTy, 6462 Allocator->getExprLoc()); 6463 (void)CGF.EmitRuntimeCall( 6464 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6465 OMPRTL___kmpc_destroy_allocator), 6466 {ThreadId, AllocatorVal}); 6467} 6468 6469void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6470 const OMPExecutableDirective &D, StringRef ParentName, 6471 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6472 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6473 // Create a unique name for the entry function using the source location 6474 // information of the current target region. The name will be something like: 6475 // 6476 // __omp_offloading_DD_FFFF_PP_lBB 6477 // 6478 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6479 // mangled name of the function that encloses the target region and BB is the 6480 // line number of the target region. 6481 6482 unsigned DeviceID; 6483 unsigned FileID; 6484 unsigned Line; 6485 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6486 Line); 6487 SmallString<64> EntryFnName; 6488 { 6489 llvm::raw_svector_ostream OS(EntryFnName); 6490 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6491 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6492 } 6493 6494 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6495 6496 CodeGenFunction CGF(CGM, true); 6497 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6498 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6499 6500 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6501 6502 // If this target outline function is not an offload entry, we don't need to 6503 // register it. 6504 if (!IsOffloadEntry) 6505 return; 6506 6507 // The target region ID is used by the runtime library to identify the current 6508 // target region, so it only has to be unique and not necessarily point to 6509 // anything. It could be the pointer to the outlined function that implements 6510 // the target region, but we aren't using that so that the compiler doesn't 6511 // need to keep that, and could therefore inline the host function if proven 6512 // worthwhile during optimization. In the other hand, if emitting code for the 6513 // device, the ID has to be the function address so that it can retrieved from 6514 // the offloading entry and launched by the runtime library. We also mark the 6515 // outlined function to have external linkage in case we are emitting code for 6516 // the device, because these functions will be entry points to the device. 6517 6518 if (CGM.getLangOpts().OpenMPIsDevice) { 6519 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6520 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6521 OutlinedFn->setDSOLocal(false); 6522 if (CGM.getTriple().isAMDGCN()) 6523 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6524 } else { 6525 std::string Name = getName({EntryFnName, "region_id"}); 6526 OutlinedFnID = new llvm::GlobalVariable( 6527 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6528 llvm::GlobalValue::WeakAnyLinkage, 6529 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6530 } 6531 6532 // Register the information for the entry associated with this target region. 6533 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6534 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6535 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6536} 6537 6538/// Checks if the expression is constant or does not have non-trivial function 6539/// calls. 6540static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6541 // We can skip constant expressions. 6542 // We can skip expressions with trivial calls or simple expressions. 6543 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6544 !E->hasNonTrivialCall(Ctx)) && 6545 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6546} 6547 6548const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6549 const Stmt *Body) { 6550 const Stmt *Child = Body->IgnoreContainers(); 6551 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6552 Child = nullptr; 6553 for (const Stmt *S : C->body()) { 6554 if (const auto *E = dyn_cast<Expr>(S)) { 6555 if (isTrivial(Ctx, E)) 6556 continue; 6557 } 6558 // Some of the statements can be ignored. 6559 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6560 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6561 continue; 6562 // Analyze declarations. 6563 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6564 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6565 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6566 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6567 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6568 isa<UsingDirectiveDecl>(D) || 6569 isa<OMPDeclareReductionDecl>(D) || 6570 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6571 return true; 6572 const auto *VD = dyn_cast<VarDecl>(D); 6573 if (!VD) 6574 return false; 6575 return VD->hasGlobalStorage() || !VD->isUsed(); 6576 })) 6577 continue; 6578 } 6579 // Found multiple children - cannot get the one child only. 6580 if (Child) 6581 return nullptr; 6582 Child = S; 6583 } 6584 if (Child) 6585 Child = Child->IgnoreContainers(); 6586 } 6587 return Child; 6588} 6589 6590/// Emit the number of teams for a target directive. Inspect the num_teams 6591/// clause associated with a teams construct combined or closely nested 6592/// with the target directive. 6593/// 6594/// Emit a team of size one for directives such as 'target parallel' that 6595/// have no associated teams construct. 6596/// 6597/// Otherwise, return nullptr. 6598static llvm::Value * 6599emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6600 const OMPExecutableDirective &D) { 6601 assert(!CGF.getLangOpts().OpenMPIsDevice && 6602 "Clauses associated with the teams directive expected to be emitted " 6603 "only for the host!"); 6604 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6605 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6606 "Expected target-based executable directive."); 6607 CGBuilderTy &Bld = CGF.Builder; 6608 switch (DirectiveKind) { 6609 case OMPD_target: { 6610 const auto *CS = D.getInnermostCapturedStmt(); 6611 const auto *Body = 6612 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6613 const Stmt *ChildStmt = 6614 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6615 if (const auto *NestedDir = 6616 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6617 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6618 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6619 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6620 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6621 const Expr *NumTeams = 6622 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6623 llvm::Value *NumTeamsVal = 6624 CGF.EmitScalarExpr(NumTeams, 6625 /*IgnoreResultAssign*/ true); 6626 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6627 /*isSigned=*/true); 6628 } 6629 return Bld.getInt32(0); 6630 } 6631 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6632 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6633 return Bld.getInt32(1); 6634 return Bld.getInt32(0); 6635 } 6636 return nullptr; 6637 } 6638 case OMPD_target_teams: 6639 case OMPD_target_teams_distribute: 6640 case OMPD_target_teams_distribute_simd: 6641 case OMPD_target_teams_distribute_parallel_for: 6642 case OMPD_target_teams_distribute_parallel_for_simd: { 6643 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6644 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6645 const Expr *NumTeams = 6646 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6647 llvm::Value *NumTeamsVal = 6648 CGF.EmitScalarExpr(NumTeams, 6649 /*IgnoreResultAssign*/ true); 6650 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6651 /*isSigned=*/true); 6652 } 6653 return Bld.getInt32(0); 6654 } 6655 case OMPD_target_parallel: 6656 case OMPD_target_parallel_for: 6657 case OMPD_target_parallel_for_simd: 6658 case OMPD_target_simd: 6659 return Bld.getInt32(1); 6660 case OMPD_parallel: 6661 case OMPD_for: 6662 case OMPD_parallel_for: 6663 case OMPD_parallel_master: 6664 case OMPD_parallel_sections: 6665 case OMPD_for_simd: 6666 case OMPD_parallel_for_simd: 6667 case OMPD_cancel: 6668 case OMPD_cancellation_point: 6669 case OMPD_ordered: 6670 case OMPD_threadprivate: 6671 case OMPD_allocate: 6672 case OMPD_task: 6673 case OMPD_simd: 6674 case OMPD_tile: 6675 case OMPD_sections: 6676 case OMPD_section: 6677 case OMPD_single: 6678 case OMPD_master: 6679 case OMPD_critical: 6680 case OMPD_taskyield: 6681 case OMPD_barrier: 6682 case OMPD_taskwait: 6683 case OMPD_taskgroup: 6684 case OMPD_atomic: 6685 case OMPD_flush: 6686 case OMPD_depobj: 6687 case OMPD_scan: 6688 case OMPD_teams: 6689 case OMPD_target_data: 6690 case OMPD_target_exit_data: 6691 case OMPD_target_enter_data: 6692 case OMPD_distribute: 6693 case OMPD_distribute_simd: 6694 case OMPD_distribute_parallel_for: 6695 case OMPD_distribute_parallel_for_simd: 6696 case OMPD_teams_distribute: 6697 case OMPD_teams_distribute_simd: 6698 case OMPD_teams_distribute_parallel_for: 6699 case OMPD_teams_distribute_parallel_for_simd: 6700 case OMPD_target_update: 6701 case OMPD_declare_simd: 6702 case OMPD_declare_variant: 6703 case OMPD_begin_declare_variant: 6704 case OMPD_end_declare_variant: 6705 case OMPD_declare_target: 6706 case OMPD_end_declare_target: 6707 case OMPD_declare_reduction: 6708 case OMPD_declare_mapper: 6709 case OMPD_taskloop: 6710 case OMPD_taskloop_simd: 6711 case OMPD_master_taskloop: 6712 case OMPD_master_taskloop_simd: 6713 case OMPD_parallel_master_taskloop: 6714 case OMPD_parallel_master_taskloop_simd: 6715 case OMPD_requires: 6716 case OMPD_unknown: 6717 break; 6718 default: 6719 break; 6720 } 6721 llvm_unreachable("Unexpected directive kind."); 6722} 6723 6724static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6725 llvm::Value *DefaultThreadLimitVal) { 6726 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6727 CGF.getContext(), CS->getCapturedStmt()); 6728 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6729 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6730 llvm::Value *NumThreads = nullptr; 6731 llvm::Value *CondVal = nullptr; 6732 // Handle if clause. If if clause present, the number of threads is 6733 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6734 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6735 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6736 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6737 const OMPIfClause *IfClause = nullptr; 6738 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6739 if (C->getNameModifier() == OMPD_unknown || 6740 C->getNameModifier() == OMPD_parallel) { 6741 IfClause = C; 6742 break; 6743 } 6744 } 6745 if (IfClause) { 6746 const Expr *Cond = IfClause->getCondition(); 6747 bool Result; 6748 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6749 if (!Result) 6750 return CGF.Builder.getInt32(1); 6751 } else { 6752 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6753 if (const auto *PreInit = 6754 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6755 for (const auto *I : PreInit->decls()) { 6756 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6757 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6758 } else { 6759 CodeGenFunction::AutoVarEmission Emission = 6760 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6761 CGF.EmitAutoVarCleanups(Emission); 6762 } 6763 } 6764 } 6765 CondVal = CGF.EvaluateExprAsBool(Cond); 6766 } 6767 } 6768 } 6769 // Check the value of num_threads clause iff if clause was not specified 6770 // or is not evaluated to false. 6771 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6772 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6773 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6774 const auto *NumThreadsClause = 6775 Dir->getSingleClause<OMPNumThreadsClause>(); 6776 CodeGenFunction::LexicalScope Scope( 6777 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6778 if (const auto *PreInit = 6779 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6780 for (const auto *I : PreInit->decls()) { 6781 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6782 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6783 } else { 6784 CodeGenFunction::AutoVarEmission Emission = 6785 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6786 CGF.EmitAutoVarCleanups(Emission); 6787 } 6788 } 6789 } 6790 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6791 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6792 /*isSigned=*/false); 6793 if (DefaultThreadLimitVal) 6794 NumThreads = CGF.Builder.CreateSelect( 6795 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6796 DefaultThreadLimitVal, NumThreads); 6797 } else { 6798 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6799 : CGF.Builder.getInt32(0); 6800 } 6801 // Process condition of the if clause. 6802 if (CondVal) { 6803 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6804 CGF.Builder.getInt32(1)); 6805 } 6806 return NumThreads; 6807 } 6808 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6809 return CGF.Builder.getInt32(1); 6810 return DefaultThreadLimitVal; 6811 } 6812 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6813 : CGF.Builder.getInt32(0); 6814} 6815 6816/// Emit the number of threads for a target directive. Inspect the 6817/// thread_limit clause associated with a teams construct combined or closely 6818/// nested with the target directive. 6819/// 6820/// Emit the num_threads clause for directives such as 'target parallel' that 6821/// have no associated teams construct. 6822/// 6823/// Otherwise, return nullptr. 6824static llvm::Value * 6825emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6826 const OMPExecutableDirective &D) { 6827 assert(!CGF.getLangOpts().OpenMPIsDevice && 6828 "Clauses associated with the teams directive expected to be emitted " 6829 "only for the host!"); 6830 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6831 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6832 "Expected target-based executable directive."); 6833 CGBuilderTy &Bld = CGF.Builder; 6834 llvm::Value *ThreadLimitVal = nullptr; 6835 llvm::Value *NumThreadsVal = nullptr; 6836 switch (DirectiveKind) { 6837 case OMPD_target: { 6838 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6839 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6840 return NumThreads; 6841 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6842 CGF.getContext(), CS->getCapturedStmt()); 6843 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6844 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6845 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6846 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6847 const auto *ThreadLimitClause = 6848 Dir->getSingleClause<OMPThreadLimitClause>(); 6849 CodeGenFunction::LexicalScope Scope( 6850 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6851 if (const auto *PreInit = 6852 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6853 for (const auto *I : PreInit->decls()) { 6854 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6855 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6856 } else { 6857 CodeGenFunction::AutoVarEmission Emission = 6858 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6859 CGF.EmitAutoVarCleanups(Emission); 6860 } 6861 } 6862 } 6863 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6864 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6865 ThreadLimitVal = 6866 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6867 } 6868 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6869 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6870 CS = Dir->getInnermostCapturedStmt(); 6871 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6872 CGF.getContext(), CS->getCapturedStmt()); 6873 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6874 } 6875 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6876 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6877 CS = Dir->getInnermostCapturedStmt(); 6878 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6879 return NumThreads; 6880 } 6881 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6882 return Bld.getInt32(1); 6883 } 6884 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6885 } 6886 case OMPD_target_teams: { 6887 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6888 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6889 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6890 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6891 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6892 ThreadLimitVal = 6893 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6894 } 6895 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6896 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6897 return NumThreads; 6898 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6899 CGF.getContext(), CS->getCapturedStmt()); 6900 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6901 if (Dir->getDirectiveKind() == OMPD_distribute) { 6902 CS = Dir->getInnermostCapturedStmt(); 6903 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6904 return NumThreads; 6905 } 6906 } 6907 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6908 } 6909 case OMPD_target_teams_distribute: 6910 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6911 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6912 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6913 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6914 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6915 ThreadLimitVal = 6916 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6917 } 6918 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6919 case OMPD_target_parallel: 6920 case OMPD_target_parallel_for: 6921 case OMPD_target_parallel_for_simd: 6922 case OMPD_target_teams_distribute_parallel_for: 6923 case OMPD_target_teams_distribute_parallel_for_simd: { 6924 llvm::Value *CondVal = nullptr; 6925 // Handle if clause. If if clause present, the number of threads is 6926 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6927 if (D.hasClausesOfKind<OMPIfClause>()) { 6928 const OMPIfClause *IfClause = nullptr; 6929 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6930 if (C->getNameModifier() == OMPD_unknown || 6931 C->getNameModifier() == OMPD_parallel) { 6932 IfClause = C; 6933 break; 6934 } 6935 } 6936 if (IfClause) { 6937 const Expr *Cond = IfClause->getCondition(); 6938 bool Result; 6939 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6940 if (!Result) 6941 return Bld.getInt32(1); 6942 } else { 6943 CodeGenFunction::RunCleanupsScope Scope(CGF); 6944 CondVal = CGF.EvaluateExprAsBool(Cond); 6945 } 6946 } 6947 } 6948 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6949 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6950 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6951 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6952 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6953 ThreadLimitVal = 6954 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6955 } 6956 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6957 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6958 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6959 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6960 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6961 NumThreadsVal = 6962 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6963 ThreadLimitVal = ThreadLimitVal 6964 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6965 ThreadLimitVal), 6966 NumThreadsVal, ThreadLimitVal) 6967 : NumThreadsVal; 6968 } 6969 if (!ThreadLimitVal) 6970 ThreadLimitVal = Bld.getInt32(0); 6971 if (CondVal) 6972 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6973 return ThreadLimitVal; 6974 } 6975 case OMPD_target_teams_distribute_simd: 6976 case OMPD_target_simd: 6977 return Bld.getInt32(1); 6978 case OMPD_parallel: 6979 case OMPD_for: 6980 case OMPD_parallel_for: 6981 case OMPD_parallel_master: 6982 case OMPD_parallel_sections: 6983 case OMPD_for_simd: 6984 case OMPD_parallel_for_simd: 6985 case OMPD_cancel: 6986 case OMPD_cancellation_point: 6987 case OMPD_ordered: 6988 case OMPD_threadprivate: 6989 case OMPD_allocate: 6990 case OMPD_task: 6991 case OMPD_simd: 6992 case OMPD_tile: 6993 case OMPD_sections: 6994 case OMPD_section: 6995 case OMPD_single: 6996 case OMPD_master: 6997 case OMPD_critical: 6998 case OMPD_taskyield: 6999 case OMPD_barrier: 7000 case OMPD_taskwait: 7001 case OMPD_taskgroup: 7002 case OMPD_atomic: 7003 case OMPD_flush: 7004 case OMPD_depobj: 7005 case OMPD_scan: 7006 case OMPD_teams: 7007 case OMPD_target_data: 7008 case OMPD_target_exit_data: 7009 case OMPD_target_enter_data: 7010 case OMPD_distribute: 7011 case OMPD_distribute_simd: 7012 case OMPD_distribute_parallel_for: 7013 case OMPD_distribute_parallel_for_simd: 7014 case OMPD_teams_distribute: 7015 case OMPD_teams_distribute_simd: 7016 case OMPD_teams_distribute_parallel_for: 7017 case OMPD_teams_distribute_parallel_for_simd: 7018 case OMPD_target_update: 7019 case OMPD_declare_simd: 7020 case OMPD_declare_variant: 7021 case OMPD_begin_declare_variant: 7022 case OMPD_end_declare_variant: 7023 case OMPD_declare_target: 7024 case OMPD_end_declare_target: 7025 case OMPD_declare_reduction: 7026 case OMPD_declare_mapper: 7027 case OMPD_taskloop: 7028 case OMPD_taskloop_simd: 7029 case OMPD_master_taskloop: 7030 case OMPD_master_taskloop_simd: 7031 case OMPD_parallel_master_taskloop: 7032 case OMPD_parallel_master_taskloop_simd: 7033 case OMPD_requires: 7034 case OMPD_unknown: 7035 break; 7036 default: 7037 break; 7038 } 7039 llvm_unreachable("Unsupported directive kind."); 7040} 7041 7042namespace { 7043LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7044 7045// Utility to handle information from clauses associated with a given 7046// construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7047// It provides a convenient interface to obtain the information and generate 7048// code for that information. 7049class MappableExprsHandler { 7050public: 7051 /// Values for bit flags used to specify the mapping type for 7052 /// offloading. 7053 enum OpenMPOffloadMappingFlags : uint64_t { 7054 /// No flags 7055 OMP_MAP_NONE = 0x0, 7056 /// Allocate memory on the device and move data from host to device. 7057 OMP_MAP_TO = 0x01, 7058 /// Allocate memory on the device and move data from device to host. 7059 OMP_MAP_FROM = 0x02, 7060 /// Always perform the requested mapping action on the element, even 7061 /// if it was already mapped before. 7062 OMP_MAP_ALWAYS = 0x04, 7063 /// Delete the element from the device environment, ignoring the 7064 /// current reference count associated with the element. 7065 OMP_MAP_DELETE = 0x08, 7066 /// The element being mapped is a pointer-pointee pair; both the 7067 /// pointer and the pointee should be mapped. 7068 OMP_MAP_PTR_AND_OBJ = 0x10, 7069 /// This flags signals that the base address of an entry should be 7070 /// passed to the target kernel as an argument. 7071 OMP_MAP_TARGET_PARAM = 0x20, 7072 /// Signal that the runtime library has to return the device pointer 7073 /// in the current position for the data being mapped. Used when we have the 7074 /// use_device_ptr or use_device_addr clause. 7075 OMP_MAP_RETURN_PARAM = 0x40, 7076 /// This flag signals that the reference being passed is a pointer to 7077 /// private data. 7078 OMP_MAP_PRIVATE = 0x80, 7079 /// Pass the element to the device by value. 7080 OMP_MAP_LITERAL = 0x100, 7081 /// Implicit map 7082 OMP_MAP_IMPLICIT = 0x200, 7083 /// Close is a hint to the runtime to allocate memory close to 7084 /// the target device. 7085 OMP_MAP_CLOSE = 0x400, 7086 /// 0x800 is reserved for compatibility with XLC. 7087 /// Produce a runtime error if the data is not already allocated. 7088 OMP_MAP_PRESENT = 0x1000, 7089 /// Signal that the runtime library should use args as an array of 7090 /// descriptor_dim pointers and use args_size as dims. Used when we have 7091 /// non-contiguous list items in target update directive 7092 OMP_MAP_NON_CONTIG = 0x100000000000, 7093 /// The 16 MSBs of the flags indicate whether the entry is member of some 7094 /// struct/class. 7095 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7096 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7097 }; 7098 7099 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7100 static unsigned getFlagMemberOffset() { 7101 unsigned Offset = 0; 7102 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7103 Remain = Remain >> 1) 7104 Offset++; 7105 return Offset; 7106 } 7107 7108 /// Class that holds debugging information for a data mapping to be passed to 7109 /// the runtime library. 7110 class MappingExprInfo { 7111 /// The variable declaration used for the data mapping. 7112 const ValueDecl *MapDecl = nullptr; 7113 /// The original expression used in the map clause, or null if there is 7114 /// none. 7115 const Expr *MapExpr = nullptr; 7116 7117 public: 7118 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7119 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7120 7121 const ValueDecl *getMapDecl() const { return MapDecl; } 7122 const Expr *getMapExpr() const { return MapExpr; } 7123 }; 7124 7125 /// Class that associates information with a base pointer to be passed to the 7126 /// runtime library. 7127 class BasePointerInfo { 7128 /// The base pointer. 7129 llvm::Value *Ptr = nullptr; 7130 /// The base declaration that refers to this device pointer, or null if 7131 /// there is none. 7132 const ValueDecl *DevPtrDecl = nullptr; 7133 7134 public: 7135 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7136 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7137 llvm::Value *operator*() const { return Ptr; } 7138 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7139 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7140 }; 7141 7142 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7143 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7144 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7145 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7146 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7147 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7148 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7149 7150 /// This structure contains combined information generated for mappable 7151 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7152 /// mappers, and non-contiguous information. 7153 struct MapCombinedInfoTy { 7154 struct StructNonContiguousInfo { 7155 bool IsNonContiguous = false; 7156 MapDimArrayTy Dims; 7157 MapNonContiguousArrayTy Offsets; 7158 MapNonContiguousArrayTy Counts; 7159 MapNonContiguousArrayTy Strides; 7160 }; 7161 MapExprsArrayTy Exprs; 7162 MapBaseValuesArrayTy BasePointers; 7163 MapValuesArrayTy Pointers; 7164 MapValuesArrayTy Sizes; 7165 MapFlagsArrayTy Types; 7166 MapMappersArrayTy Mappers; 7167 StructNonContiguousInfo NonContigInfo; 7168 7169 /// Append arrays in \a CurInfo. 7170 void append(MapCombinedInfoTy &CurInfo) { 7171 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7172 BasePointers.append(CurInfo.BasePointers.begin(), 7173 CurInfo.BasePointers.end()); 7174 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7175 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7176 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7177 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7178 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7179 CurInfo.NonContigInfo.Dims.end()); 7180 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7181 CurInfo.NonContigInfo.Offsets.end()); 7182 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7183 CurInfo.NonContigInfo.Counts.end()); 7184 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7185 CurInfo.NonContigInfo.Strides.end()); 7186 } 7187 }; 7188 7189 /// Map between a struct and the its lowest & highest elements which have been 7190 /// mapped. 7191 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7192 /// HE(FieldIndex, Pointer)} 7193 struct StructRangeInfoTy { 7194 MapCombinedInfoTy PreliminaryMapData; 7195 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7196 0, Address::invalid()}; 7197 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7198 0, Address::invalid()}; 7199 Address Base = Address::invalid(); 7200 Address LB = Address::invalid(); 7201 bool IsArraySection = false; 7202 bool HasCompleteRecord = false; 7203 }; 7204 7205private: 7206 /// Kind that defines how a device pointer has to be returned. 7207 struct MapInfo { 7208 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7209 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7210 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7211 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7212 bool ReturnDevicePointer = false; 7213 bool IsImplicit = false; 7214 const ValueDecl *Mapper = nullptr; 7215 const Expr *VarRef = nullptr; 7216 bool ForDeviceAddr = false; 7217 7218 MapInfo() = default; 7219 MapInfo( 7220 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7221 OpenMPMapClauseKind MapType, 7222 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7223 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7224 bool ReturnDevicePointer, bool IsImplicit, 7225 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7226 bool ForDeviceAddr = false) 7227 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7228 MotionModifiers(MotionModifiers), 7229 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7230 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7231 }; 7232 7233 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7234 /// member and there is no map information about it, then emission of that 7235 /// entry is deferred until the whole struct has been processed. 7236 struct DeferredDevicePtrEntryTy { 7237 const Expr *IE = nullptr; 7238 const ValueDecl *VD = nullptr; 7239 bool ForDeviceAddr = false; 7240 7241 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7242 bool ForDeviceAddr) 7243 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7244 }; 7245 7246 /// The target directive from where the mappable clauses were extracted. It 7247 /// is either a executable directive or a user-defined mapper directive. 7248 llvm::PointerUnion<const OMPExecutableDirective *, 7249 const OMPDeclareMapperDecl *> 7250 CurDir; 7251 7252 /// Function the directive is being generated for. 7253 CodeGenFunction &CGF; 7254 7255 /// Set of all first private variables in the current directive. 7256 /// bool data is set to true if the variable is implicitly marked as 7257 /// firstprivate, false otherwise. 7258 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7259 7260 /// Map between device pointer declarations and their expression components. 7261 /// The key value for declarations in 'this' is null. 7262 llvm::DenseMap< 7263 const ValueDecl *, 7264 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7265 DevPointersMap; 7266 7267 llvm::Value *getExprTypeSize(const Expr *E) const { 7268 QualType ExprTy = E->getType().getCanonicalType(); 7269 7270 // Calculate the size for array shaping expression. 7271 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7272 llvm::Value *Size = 7273 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7274 for (const Expr *SE : OAE->getDimensions()) { 7275 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7276 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7277 CGF.getContext().getSizeType(), 7278 SE->getExprLoc()); 7279 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7280 } 7281 return Size; 7282 } 7283 7284 // Reference types are ignored for mapping purposes. 7285 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7286 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7287 7288 // Given that an array section is considered a built-in type, we need to 7289 // do the calculation based on the length of the section instead of relying 7290 // on CGF.getTypeSize(E->getType()). 7291 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7292 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7293 OAE->getBase()->IgnoreParenImpCasts()) 7294 .getCanonicalType(); 7295 7296 // If there is no length associated with the expression and lower bound is 7297 // not specified too, that means we are using the whole length of the 7298 // base. 7299 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7300 !OAE->getLowerBound()) 7301 return CGF.getTypeSize(BaseTy); 7302 7303 llvm::Value *ElemSize; 7304 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7305 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7306 } else { 7307 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7308 assert(ATy && "Expecting array type if not a pointer type."); 7309 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7310 } 7311 7312 // If we don't have a length at this point, that is because we have an 7313 // array section with a single element. 7314 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7315 return ElemSize; 7316 7317 if (const Expr *LenExpr = OAE->getLength()) { 7318 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7319 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7320 CGF.getContext().getSizeType(), 7321 LenExpr->getExprLoc()); 7322 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7323 } 7324 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7325 OAE->getLowerBound() && "expected array_section[lb:]."); 7326 // Size = sizetype - lb * elemtype; 7327 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7328 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7329 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7330 CGF.getContext().getSizeType(), 7331 OAE->getLowerBound()->getExprLoc()); 7332 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7333 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7334 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7335 LengthVal = CGF.Builder.CreateSelect( 7336 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7337 return LengthVal; 7338 } 7339 return CGF.getTypeSize(ExprTy); 7340 } 7341 7342 /// Return the corresponding bits for a given map clause modifier. Add 7343 /// a flag marking the map as a pointer if requested. Add a flag marking the 7344 /// map as the first one of a series of maps that relate to the same map 7345 /// expression. 7346 OpenMPOffloadMappingFlags getMapTypeBits( 7347 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7348 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7349 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7350 OpenMPOffloadMappingFlags Bits = 7351 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7352 switch (MapType) { 7353 case OMPC_MAP_alloc: 7354 case OMPC_MAP_release: 7355 // alloc and release is the default behavior in the runtime library, i.e. 7356 // if we don't pass any bits alloc/release that is what the runtime is 7357 // going to do. Therefore, we don't need to signal anything for these two 7358 // type modifiers. 7359 break; 7360 case OMPC_MAP_to: 7361 Bits |= OMP_MAP_TO; 7362 break; 7363 case OMPC_MAP_from: 7364 Bits |= OMP_MAP_FROM; 7365 break; 7366 case OMPC_MAP_tofrom: 7367 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7368 break; 7369 case OMPC_MAP_delete: 7370 Bits |= OMP_MAP_DELETE; 7371 break; 7372 case OMPC_MAP_unknown: 7373 llvm_unreachable("Unexpected map type!"); 7374 } 7375 if (AddPtrFlag) 7376 Bits |= OMP_MAP_PTR_AND_OBJ; 7377 if (AddIsTargetParamFlag) 7378 Bits |= OMP_MAP_TARGET_PARAM; 7379 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7380 != MapModifiers.end()) 7381 Bits |= OMP_MAP_ALWAYS; 7382 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7383 != MapModifiers.end()) 7384 Bits |= OMP_MAP_CLOSE; 7385 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) != 7386 MapModifiers.end() || 7387 llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) != 7388 MotionModifiers.end()) 7389 Bits |= OMP_MAP_PRESENT; 7390 if (IsNonContiguous) 7391 Bits |= OMP_MAP_NON_CONTIG; 7392 return Bits; 7393 } 7394 7395 /// Return true if the provided expression is a final array section. A 7396 /// final array section, is one whose length can't be proved to be one. 7397 bool isFinalArraySectionExpression(const Expr *E) const { 7398 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7399 7400 // It is not an array section and therefore not a unity-size one. 7401 if (!OASE) 7402 return false; 7403 7404 // An array section with no colon always refer to a single element. 7405 if (OASE->getColonLocFirst().isInvalid()) 7406 return false; 7407 7408 const Expr *Length = OASE->getLength(); 7409 7410 // If we don't have a length we have to check if the array has size 1 7411 // for this dimension. Also, we should always expect a length if the 7412 // base type is pointer. 7413 if (!Length) { 7414 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7415 OASE->getBase()->IgnoreParenImpCasts()) 7416 .getCanonicalType(); 7417 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7418 return ATy->getSize().getSExtValue() != 1; 7419 // If we don't have a constant dimension length, we have to consider 7420 // the current section as having any size, so it is not necessarily 7421 // unitary. If it happen to be unity size, that's user fault. 7422 return true; 7423 } 7424 7425 // Check if the length evaluates to 1. 7426 Expr::EvalResult Result; 7427 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7428 return true; // Can have more that size 1. 7429 7430 llvm::APSInt ConstLength = Result.Val.getInt(); 7431 return ConstLength.getSExtValue() != 1; 7432 } 7433 7434 /// Generate the base pointers, section pointers, sizes, map type bits, and 7435 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7436 /// map type, map or motion modifiers, and expression components. 7437 /// \a IsFirstComponent should be set to true if the provided set of 7438 /// components is the first associated with a capture. 7439 void generateInfoForComponentList( 7440 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7441 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7442 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7443 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7444 bool IsFirstComponentList, bool IsImplicit, 7445 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7446 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7447 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7448 OverlappedElements = llvm::None) const { 7449 // The following summarizes what has to be generated for each map and the 7450 // types below. The generated information is expressed in this order: 7451 // base pointer, section pointer, size, flags 7452 // (to add to the ones that come from the map type and modifier). 7453 // 7454 // double d; 7455 // int i[100]; 7456 // float *p; 7457 // 7458 // struct S1 { 7459 // int i; 7460 // float f[50]; 7461 // } 7462 // struct S2 { 7463 // int i; 7464 // float f[50]; 7465 // S1 s; 7466 // double *p; 7467 // struct S2 *ps; 7468 // int &ref; 7469 // } 7470 // S2 s; 7471 // S2 *ps; 7472 // 7473 // map(d) 7474 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7475 // 7476 // map(i) 7477 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7478 // 7479 // map(i[1:23]) 7480 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7481 // 7482 // map(p) 7483 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7484 // 7485 // map(p[1:24]) 7486 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7487 // in unified shared memory mode or for local pointers 7488 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7489 // 7490 // map(s) 7491 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7492 // 7493 // map(s.i) 7494 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7495 // 7496 // map(s.s.f) 7497 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7498 // 7499 // map(s.p) 7500 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7501 // 7502 // map(to: s.p[:22]) 7503 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7504 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7505 // &(s.p), &(s.p[0]), 22*sizeof(double), 7506 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7507 // (*) alloc space for struct members, only this is a target parameter 7508 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7509 // optimizes this entry out, same in the examples below) 7510 // (***) map the pointee (map: to) 7511 // 7512 // map(to: s.ref) 7513 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7514 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7515 // (*) alloc space for struct members, only this is a target parameter 7516 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7517 // optimizes this entry out, same in the examples below) 7518 // (***) map the pointee (map: to) 7519 // 7520 // map(s.ps) 7521 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7522 // 7523 // map(from: s.ps->s.i) 7524 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7525 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7526 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7527 // 7528 // map(to: s.ps->ps) 7529 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7530 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7531 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7532 // 7533 // map(s.ps->ps->ps) 7534 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7535 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7536 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7537 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7538 // 7539 // map(to: s.ps->ps->s.f[:22]) 7540 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7541 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7542 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7543 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7544 // 7545 // map(ps) 7546 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7547 // 7548 // map(ps->i) 7549 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7550 // 7551 // map(ps->s.f) 7552 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7553 // 7554 // map(from: ps->p) 7555 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7556 // 7557 // map(to: ps->p[:22]) 7558 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7559 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7560 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7561 // 7562 // map(ps->ps) 7563 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7564 // 7565 // map(from: ps->ps->s.i) 7566 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7567 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7568 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7569 // 7570 // map(from: ps->ps->ps) 7571 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7572 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7573 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7574 // 7575 // map(ps->ps->ps->ps) 7576 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7577 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7578 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7579 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7580 // 7581 // map(to: ps->ps->ps->s.f[:22]) 7582 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7583 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7584 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7585 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7586 // 7587 // map(to: s.f[:22]) map(from: s.p[:33]) 7588 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7589 // sizeof(double*) (**), TARGET_PARAM 7590 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7591 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7592 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7593 // (*) allocate contiguous space needed to fit all mapped members even if 7594 // we allocate space for members not mapped (in this example, 7595 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7596 // them as well because they fall between &s.f[0] and &s.p) 7597 // 7598 // map(from: s.f[:22]) map(to: ps->p[:33]) 7599 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7600 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7601 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7602 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7603 // (*) the struct this entry pertains to is the 2nd element in the list of 7604 // arguments, hence MEMBER_OF(2) 7605 // 7606 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7607 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7608 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7609 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7610 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7611 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7612 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7613 // (*) the struct this entry pertains to is the 4th element in the list 7614 // of arguments, hence MEMBER_OF(4) 7615 7616 // Track if the map information being generated is the first for a capture. 7617 bool IsCaptureFirstInfo = IsFirstComponentList; 7618 // When the variable is on a declare target link or in a to clause with 7619 // unified memory, a reference is needed to hold the host/device address 7620 // of the variable. 7621 bool RequiresReference = false; 7622 7623 // Scan the components from the base to the complete expression. 7624 auto CI = Components.rbegin(); 7625 auto CE = Components.rend(); 7626 auto I = CI; 7627 7628 // Track if the map information being generated is the first for a list of 7629 // components. 7630 bool IsExpressionFirstInfo = true; 7631 bool FirstPointerInComplexData = false; 7632 Address BP = Address::invalid(); 7633 const Expr *AssocExpr = I->getAssociatedExpression(); 7634 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7635 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7636 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7637 7638 if (isa<MemberExpr>(AssocExpr)) { 7639 // The base is the 'this' pointer. The content of the pointer is going 7640 // to be the base of the field being mapped. 7641 BP = CGF.LoadCXXThisAddress(); 7642 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7643 (OASE && 7644 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7645 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7646 } else if (OAShE && 7647 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7648 BP = Address( 7649 CGF.EmitScalarExpr(OAShE->getBase()), 7650 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7651 } else { 7652 // The base is the reference to the variable. 7653 // BP = &Var. 7654 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7655 if (const auto *VD = 7656 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7657 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7658 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7659 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7660 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7661 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7662 RequiresReference = true; 7663 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7664 } 7665 } 7666 } 7667 7668 // If the variable is a pointer and is being dereferenced (i.e. is not 7669 // the last component), the base has to be the pointer itself, not its 7670 // reference. References are ignored for mapping purposes. 7671 QualType Ty = 7672 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7673 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7674 // No need to generate individual map information for the pointer, it 7675 // can be associated with the combined storage if shared memory mode is 7676 // active or the base declaration is not global variable. 7677 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7678 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7679 !VD || VD->hasLocalStorage()) 7680 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7681 else 7682 FirstPointerInComplexData = true; 7683 ++I; 7684 } 7685 } 7686 7687 // Track whether a component of the list should be marked as MEMBER_OF some 7688 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7689 // in a component list should be marked as MEMBER_OF, all subsequent entries 7690 // do not belong to the base struct. E.g. 7691 // struct S2 s; 7692 // s.ps->ps->ps->f[:] 7693 // (1) (2) (3) (4) 7694 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7695 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7696 // is the pointee of ps(2) which is not member of struct s, so it should not 7697 // be marked as such (it is still PTR_AND_OBJ). 7698 // The variable is initialized to false so that PTR_AND_OBJ entries which 7699 // are not struct members are not considered (e.g. array of pointers to 7700 // data). 7701 bool ShouldBeMemberOf = false; 7702 7703 // Variable keeping track of whether or not we have encountered a component 7704 // in the component list which is a member expression. Useful when we have a 7705 // pointer or a final array section, in which case it is the previous 7706 // component in the list which tells us whether we have a member expression. 7707 // E.g. X.f[:] 7708 // While processing the final array section "[:]" it is "f" which tells us 7709 // whether we are dealing with a member of a declared struct. 7710 const MemberExpr *EncounteredME = nullptr; 7711 7712 // Track for the total number of dimension. Start from one for the dummy 7713 // dimension. 7714 uint64_t DimSize = 1; 7715 7716 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7717 bool IsPrevMemberReference = false; 7718 7719 for (; I != CE; ++I) { 7720 // If the current component is member of a struct (parent struct) mark it. 7721 if (!EncounteredME) { 7722 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7723 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7724 // as MEMBER_OF the parent struct. 7725 if (EncounteredME) { 7726 ShouldBeMemberOf = true; 7727 // Do not emit as complex pointer if this is actually not array-like 7728 // expression. 7729 if (FirstPointerInComplexData) { 7730 QualType Ty = std::prev(I) 7731 ->getAssociatedDeclaration() 7732 ->getType() 7733 .getNonReferenceType(); 7734 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7735 FirstPointerInComplexData = false; 7736 } 7737 } 7738 } 7739 7740 auto Next = std::next(I); 7741 7742 // We need to generate the addresses and sizes if this is the last 7743 // component, if the component is a pointer or if it is an array section 7744 // whose length can't be proved to be one. If this is a pointer, it 7745 // becomes the base address for the following components. 7746 7747 // A final array section, is one whose length can't be proved to be one. 7748 // If the map item is non-contiguous then we don't treat any array section 7749 // as final array section. 7750 bool IsFinalArraySection = 7751 !IsNonContiguous && 7752 isFinalArraySectionExpression(I->getAssociatedExpression()); 7753 7754 // If we have a declaration for the mapping use that, otherwise use 7755 // the base declaration of the map clause. 7756 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7757 ? I->getAssociatedDeclaration() 7758 : BaseDecl; 7759 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7760 : MapExpr; 7761 7762 // Get information on whether the element is a pointer. Have to do a 7763 // special treatment for array sections given that they are built-in 7764 // types. 7765 const auto *OASE = 7766 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7767 const auto *OAShE = 7768 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7769 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7770 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7771 bool IsPointer = 7772 OAShE || 7773 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7774 .getCanonicalType() 7775 ->isAnyPointerType()) || 7776 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7777 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7778 MapDecl && 7779 MapDecl->getType()->isLValueReferenceType(); 7780 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7781 7782 if (OASE) 7783 ++DimSize; 7784 7785 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7786 IsFinalArraySection) { 7787 // If this is not the last component, we expect the pointer to be 7788 // associated with an array expression or member expression. 7789 assert((Next == CE || 7790 isa<MemberExpr>(Next->getAssociatedExpression()) || 7791 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7792 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7793 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7794 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7795 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7796 "Unexpected expression"); 7797 7798 Address LB = Address::invalid(); 7799 Address LowestElem = Address::invalid(); 7800 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7801 const MemberExpr *E) { 7802 const Expr *BaseExpr = E->getBase(); 7803 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7804 // scalar. 7805 LValue BaseLV; 7806 if (E->isArrow()) { 7807 LValueBaseInfo BaseInfo; 7808 TBAAAccessInfo TBAAInfo; 7809 Address Addr = 7810 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7811 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7812 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7813 } else { 7814 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7815 } 7816 return BaseLV; 7817 }; 7818 if (OAShE) { 7819 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7820 CGF.getContext().getTypeAlignInChars( 7821 OAShE->getBase()->getType())); 7822 } else if (IsMemberReference) { 7823 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7824 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7825 LowestElem = CGF.EmitLValueForFieldInitialization( 7826 BaseLVal, cast<FieldDecl>(MapDecl)) 7827 .getAddress(CGF); 7828 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7829 .getAddress(CGF); 7830 } else { 7831 LowestElem = LB = 7832 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7833 .getAddress(CGF); 7834 } 7835 7836 // If this component is a pointer inside the base struct then we don't 7837 // need to create any entry for it - it will be combined with the object 7838 // it is pointing to into a single PTR_AND_OBJ entry. 7839 bool IsMemberPointerOrAddr = 7840 EncounteredME && 7841 (((IsPointer || ForDeviceAddr) && 7842 I->getAssociatedExpression() == EncounteredME) || 7843 (IsPrevMemberReference && !IsPointer) || 7844 (IsMemberReference && Next != CE && 7845 !Next->getAssociatedExpression()->getType()->isPointerType())); 7846 if (!OverlappedElements.empty() && Next == CE) { 7847 // Handle base element with the info for overlapped elements. 7848 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7849 assert(!IsPointer && 7850 "Unexpected base element with the pointer type."); 7851 // Mark the whole struct as the struct that requires allocation on the 7852 // device. 7853 PartialStruct.LowestElem = {0, LowestElem}; 7854 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7855 I->getAssociatedExpression()->getType()); 7856 Address HB = CGF.Builder.CreateConstGEP( 7857 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 7858 CGF.VoidPtrTy), 7859 TypeSize.getQuantity() - 1); 7860 PartialStruct.HighestElem = { 7861 std::numeric_limits<decltype( 7862 PartialStruct.HighestElem.first)>::max(), 7863 HB}; 7864 PartialStruct.Base = BP; 7865 PartialStruct.LB = LB; 7866 assert( 7867 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7868 "Overlapped elements must be used only once for the variable."); 7869 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7870 // Emit data for non-overlapped data. 7871 OpenMPOffloadMappingFlags Flags = 7872 OMP_MAP_MEMBER_OF | 7873 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7874 /*AddPtrFlag=*/false, 7875 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7876 llvm::Value *Size = nullptr; 7877 // Do bitcopy of all non-overlapped structure elements. 7878 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7879 Component : OverlappedElements) { 7880 Address ComponentLB = Address::invalid(); 7881 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7882 Component) { 7883 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 7884 const auto *FD = dyn_cast<FieldDecl>(VD); 7885 if (FD && FD->getType()->isLValueReferenceType()) { 7886 const auto *ME = 7887 cast<MemberExpr>(MC.getAssociatedExpression()); 7888 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7889 ComponentLB = 7890 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 7891 .getAddress(CGF); 7892 } else { 7893 ComponentLB = 7894 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7895 .getAddress(CGF); 7896 } 7897 Size = CGF.Builder.CreatePtrDiff( 7898 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7899 CGF.EmitCastToVoidPtr(LB.getPointer())); 7900 break; 7901 } 7902 } 7903 assert(Size && "Failed to determine structure size"); 7904 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7905 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7906 CombinedInfo.Pointers.push_back(LB.getPointer()); 7907 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7908 Size, CGF.Int64Ty, /*isSigned=*/true)); 7909 CombinedInfo.Types.push_back(Flags); 7910 CombinedInfo.Mappers.push_back(nullptr); 7911 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7912 : 1); 7913 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7914 } 7915 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7916 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7917 CombinedInfo.Pointers.push_back(LB.getPointer()); 7918 Size = CGF.Builder.CreatePtrDiff( 7919 CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 7920 CGF.EmitCastToVoidPtr(LB.getPointer())); 7921 CombinedInfo.Sizes.push_back( 7922 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7923 CombinedInfo.Types.push_back(Flags); 7924 CombinedInfo.Mappers.push_back(nullptr); 7925 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7926 : 1); 7927 break; 7928 } 7929 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7930 if (!IsMemberPointerOrAddr || 7931 (Next == CE && MapType != OMPC_MAP_unknown)) { 7932 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7933 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7934 CombinedInfo.Pointers.push_back(LB.getPointer()); 7935 CombinedInfo.Sizes.push_back( 7936 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7937 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7938 : 1); 7939 7940 // If Mapper is valid, the last component inherits the mapper. 7941 bool HasMapper = Mapper && Next == CE; 7942 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7943 7944 // We need to add a pointer flag for each map that comes from the 7945 // same expression except for the first one. We also need to signal 7946 // this map is the first one that relates with the current capture 7947 // (there is a set of entries for each capture). 7948 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7949 MapType, MapModifiers, MotionModifiers, IsImplicit, 7950 !IsExpressionFirstInfo || RequiresReference || 7951 FirstPointerInComplexData || IsMemberReference, 7952 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7953 7954 if (!IsExpressionFirstInfo || IsMemberReference) { 7955 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7956 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7957 if (IsPointer || (IsMemberReference && Next != CE)) 7958 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7959 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7960 7961 if (ShouldBeMemberOf) { 7962 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7963 // should be later updated with the correct value of MEMBER_OF. 7964 Flags |= OMP_MAP_MEMBER_OF; 7965 // From now on, all subsequent PTR_AND_OBJ entries should not be 7966 // marked as MEMBER_OF. 7967 ShouldBeMemberOf = false; 7968 } 7969 } 7970 7971 CombinedInfo.Types.push_back(Flags); 7972 } 7973 7974 // If we have encountered a member expression so far, keep track of the 7975 // mapped member. If the parent is "*this", then the value declaration 7976 // is nullptr. 7977 if (EncounteredME) { 7978 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7979 unsigned FieldIndex = FD->getFieldIndex(); 7980 7981 // Update info about the lowest and highest elements for this struct 7982 if (!PartialStruct.Base.isValid()) { 7983 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7984 if (IsFinalArraySection) { 7985 Address HB = 7986 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7987 .getAddress(CGF); 7988 PartialStruct.HighestElem = {FieldIndex, HB}; 7989 } else { 7990 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7991 } 7992 PartialStruct.Base = BP; 7993 PartialStruct.LB = BP; 7994 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7995 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7996 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7997 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7998 } 7999 } 8000 8001 // Need to emit combined struct for array sections. 8002 if (IsFinalArraySection || IsNonContiguous) 8003 PartialStruct.IsArraySection = true; 8004 8005 // If we have a final array section, we are done with this expression. 8006 if (IsFinalArraySection) 8007 break; 8008 8009 // The pointer becomes the base for the next element. 8010 if (Next != CE) 8011 BP = IsMemberReference ? LowestElem : LB; 8012 8013 IsExpressionFirstInfo = false; 8014 IsCaptureFirstInfo = false; 8015 FirstPointerInComplexData = false; 8016 IsPrevMemberReference = IsMemberReference; 8017 } else if (FirstPointerInComplexData) { 8018 QualType Ty = Components.rbegin() 8019 ->getAssociatedDeclaration() 8020 ->getType() 8021 .getNonReferenceType(); 8022 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8023 FirstPointerInComplexData = false; 8024 } 8025 } 8026 // If ran into the whole component - allocate the space for the whole 8027 // record. 8028 if (!EncounteredME) 8029 PartialStruct.HasCompleteRecord = true; 8030 8031 if (!IsNonContiguous) 8032 return; 8033 8034 const ASTContext &Context = CGF.getContext(); 8035 8036 // For supporting stride in array section, we need to initialize the first 8037 // dimension size as 1, first offset as 0, and first count as 1 8038 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8039 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8040 MapValuesArrayTy CurStrides; 8041 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8042 uint64_t ElementTypeSize; 8043 8044 // Collect Size information for each dimension and get the element size as 8045 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8046 // should be [10, 10] and the first stride is 4 btyes. 8047 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8048 Components) { 8049 const Expr *AssocExpr = Component.getAssociatedExpression(); 8050 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8051 8052 if (!OASE) 8053 continue; 8054 8055 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8056 auto *CAT = Context.getAsConstantArrayType(Ty); 8057 auto *VAT = Context.getAsVariableArrayType(Ty); 8058 8059 // We need all the dimension size except for the last dimension. 8060 assert((VAT || CAT || &Component == &*Components.begin()) && 8061 "Should be either ConstantArray or VariableArray if not the " 8062 "first Component"); 8063 8064 // Get element size if CurStrides is empty. 8065 if (CurStrides.empty()) { 8066 const Type *ElementType = nullptr; 8067 if (CAT) 8068 ElementType = CAT->getElementType().getTypePtr(); 8069 else if (VAT) 8070 ElementType = VAT->getElementType().getTypePtr(); 8071 else 8072 assert(&Component == &*Components.begin() && 8073 "Only expect pointer (non CAT or VAT) when this is the " 8074 "first Component"); 8075 // If ElementType is null, then it means the base is a pointer 8076 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8077 // for next iteration. 8078 if (ElementType) { 8079 // For the case that having pointer as base, we need to remove one 8080 // level of indirection. 8081 if (&Component != &*Components.begin()) 8082 ElementType = ElementType->getPointeeOrArrayElementType(); 8083 ElementTypeSize = 8084 Context.getTypeSizeInChars(ElementType).getQuantity(); 8085 CurStrides.push_back( 8086 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8087 } 8088 } 8089 // Get dimension value except for the last dimension since we don't need 8090 // it. 8091 if (DimSizes.size() < Components.size() - 1) { 8092 if (CAT) 8093 DimSizes.push_back(llvm::ConstantInt::get( 8094 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8095 else if (VAT) 8096 DimSizes.push_back(CGF.Builder.CreateIntCast( 8097 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8098 /*IsSigned=*/false)); 8099 } 8100 } 8101 8102 // Skip the dummy dimension since we have already have its information. 8103 auto DI = DimSizes.begin() + 1; 8104 // Product of dimension. 8105 llvm::Value *DimProd = 8106 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8107 8108 // Collect info for non-contiguous. Notice that offset, count, and stride 8109 // are only meaningful for array-section, so we insert a null for anything 8110 // other than array-section. 8111 // Also, the size of offset, count, and stride are not the same as 8112 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8113 // count, and stride are the same as the number of non-contiguous 8114 // declaration in target update to/from clause. 8115 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8116 Components) { 8117 const Expr *AssocExpr = Component.getAssociatedExpression(); 8118 8119 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8120 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8121 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8122 /*isSigned=*/false); 8123 CurOffsets.push_back(Offset); 8124 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8125 CurStrides.push_back(CurStrides.back()); 8126 continue; 8127 } 8128 8129 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8130 8131 if (!OASE) 8132 continue; 8133 8134 // Offset 8135 const Expr *OffsetExpr = OASE->getLowerBound(); 8136 llvm::Value *Offset = nullptr; 8137 if (!OffsetExpr) { 8138 // If offset is absent, then we just set it to zero. 8139 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8140 } else { 8141 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8142 CGF.Int64Ty, 8143 /*isSigned=*/false); 8144 } 8145 CurOffsets.push_back(Offset); 8146 8147 // Count 8148 const Expr *CountExpr = OASE->getLength(); 8149 llvm::Value *Count = nullptr; 8150 if (!CountExpr) { 8151 // In Clang, once a high dimension is an array section, we construct all 8152 // the lower dimension as array section, however, for case like 8153 // arr[0:2][2], Clang construct the inner dimension as an array section 8154 // but it actually is not in an array section form according to spec. 8155 if (!OASE->getColonLocFirst().isValid() && 8156 !OASE->getColonLocSecond().isValid()) { 8157 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8158 } else { 8159 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8160 // When the length is absent it defaults to ���(size ��� 8161 // lower-bound)/stride���, where size is the size of the array 8162 // dimension. 8163 const Expr *StrideExpr = OASE->getStride(); 8164 llvm::Value *Stride = 8165 StrideExpr 8166 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8167 CGF.Int64Ty, /*isSigned=*/false) 8168 : nullptr; 8169 if (Stride) 8170 Count = CGF.Builder.CreateUDiv( 8171 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8172 else 8173 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8174 } 8175 } else { 8176 Count = CGF.EmitScalarExpr(CountExpr); 8177 } 8178 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8179 CurCounts.push_back(Count); 8180 8181 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8182 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8183 // Offset Count Stride 8184 // D0 0 1 4 (int) <- dummy dimension 8185 // D1 0 2 8 (2 * (1) * 4) 8186 // D2 1 2 20 (1 * (1 * 5) * 4) 8187 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8188 const Expr *StrideExpr = OASE->getStride(); 8189 llvm::Value *Stride = 8190 StrideExpr 8191 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8192 CGF.Int64Ty, /*isSigned=*/false) 8193 : nullptr; 8194 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8195 if (Stride) 8196 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8197 else 8198 CurStrides.push_back(DimProd); 8199 if (DI != DimSizes.end()) 8200 ++DI; 8201 } 8202 8203 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8204 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8205 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8206 } 8207 8208 /// Return the adjusted map modifiers if the declaration a capture refers to 8209 /// appears in a first-private clause. This is expected to be used only with 8210 /// directives that start with 'target'. 8211 MappableExprsHandler::OpenMPOffloadMappingFlags 8212 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8213 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8214 8215 // A first private variable captured by reference will use only the 8216 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8217 // declaration is known as first-private in this handler. 8218 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8219 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8220 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8221 return MappableExprsHandler::OMP_MAP_ALWAYS | 8222 MappableExprsHandler::OMP_MAP_TO; 8223 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8224 return MappableExprsHandler::OMP_MAP_TO | 8225 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8226 return MappableExprsHandler::OMP_MAP_PRIVATE | 8227 MappableExprsHandler::OMP_MAP_TO; 8228 } 8229 return MappableExprsHandler::OMP_MAP_TO | 8230 MappableExprsHandler::OMP_MAP_FROM; 8231 } 8232 8233 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8234 // Rotate by getFlagMemberOffset() bits. 8235 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8236 << getFlagMemberOffset()); 8237 } 8238 8239 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8240 OpenMPOffloadMappingFlags MemberOfFlag) { 8241 // If the entry is PTR_AND_OBJ but has not been marked with the special 8242 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8243 // marked as MEMBER_OF. 8244 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8245 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8246 return; 8247 8248 // Reset the placeholder value to prepare the flag for the assignment of the 8249 // proper MEMBER_OF value. 8250 Flags &= ~OMP_MAP_MEMBER_OF; 8251 Flags |= MemberOfFlag; 8252 } 8253 8254 void getPlainLayout(const CXXRecordDecl *RD, 8255 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8256 bool AsBase) const { 8257 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8258 8259 llvm::StructType *St = 8260 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8261 8262 unsigned NumElements = St->getNumElements(); 8263 llvm::SmallVector< 8264 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8265 RecordLayout(NumElements); 8266 8267 // Fill bases. 8268 for (const auto &I : RD->bases()) { 8269 if (I.isVirtual()) 8270 continue; 8271 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8272 // Ignore empty bases. 8273 if (Base->isEmpty() || CGF.getContext() 8274 .getASTRecordLayout(Base) 8275 .getNonVirtualSize() 8276 .isZero()) 8277 continue; 8278 8279 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8280 RecordLayout[FieldIndex] = Base; 8281 } 8282 // Fill in virtual bases. 8283 for (const auto &I : RD->vbases()) { 8284 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8285 // Ignore empty bases. 8286 if (Base->isEmpty()) 8287 continue; 8288 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8289 if (RecordLayout[FieldIndex]) 8290 continue; 8291 RecordLayout[FieldIndex] = Base; 8292 } 8293 // Fill in all the fields. 8294 assert(!RD->isUnion() && "Unexpected union."); 8295 for (const auto *Field : RD->fields()) { 8296 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8297 // will fill in later.) 8298 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8299 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8300 RecordLayout[FieldIndex] = Field; 8301 } 8302 } 8303 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8304 &Data : RecordLayout) { 8305 if (Data.isNull()) 8306 continue; 8307 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8308 getPlainLayout(Base, Layout, /*AsBase=*/true); 8309 else 8310 Layout.push_back(Data.get<const FieldDecl *>()); 8311 } 8312 } 8313 8314 /// Generate all the base pointers, section pointers, sizes, map types, and 8315 /// mappers for the extracted mappable expressions (all included in \a 8316 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8317 /// pair of the relevant declaration and index where it occurs is appended to 8318 /// the device pointers info array. 8319 void generateAllInfoForClauses( 8320 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8321 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8322 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8323 // We have to process the component lists that relate with the same 8324 // declaration in a single chunk so that we can generate the map flags 8325 // correctly. Therefore, we organize all lists in a map. 8326 enum MapKind { Present, Allocs, Other, Total }; 8327 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8328 SmallVector<SmallVector<MapInfo, 8>, 4>> 8329 Info; 8330 8331 // Helper function to fill the information map for the different supported 8332 // clauses. 8333 auto &&InfoGen = 8334 [&Info, &SkipVarSet]( 8335 const ValueDecl *D, MapKind Kind, 8336 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8337 OpenMPMapClauseKind MapType, 8338 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8339 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8340 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8341 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8342 if (SkipVarSet.contains(D)) 8343 return; 8344 auto It = Info.find(D); 8345 if (It == Info.end()) 8346 It = Info 8347 .insert(std::make_pair( 8348 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8349 .first; 8350 It->second[Kind].emplace_back( 8351 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8352 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8353 }; 8354 8355 for (const auto *Cl : Clauses) { 8356 const auto *C = dyn_cast<OMPMapClause>(Cl); 8357 if (!C) 8358 continue; 8359 MapKind Kind = Other; 8360 if (!C->getMapTypeModifiers().empty() && 8361 llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) { 8362 return K == OMPC_MAP_MODIFIER_present; 8363 })) 8364 Kind = Present; 8365 else if (C->getMapType() == OMPC_MAP_alloc) 8366 Kind = Allocs; 8367 const auto *EI = C->getVarRefs().begin(); 8368 for (const auto L : C->component_lists()) { 8369 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8370 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8371 C->getMapTypeModifiers(), llvm::None, 8372 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8373 E); 8374 ++EI; 8375 } 8376 } 8377 for (const auto *Cl : Clauses) { 8378 const auto *C = dyn_cast<OMPToClause>(Cl); 8379 if (!C) 8380 continue; 8381 MapKind Kind = Other; 8382 if (!C->getMotionModifiers().empty() && 8383 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8384 return K == OMPC_MOTION_MODIFIER_present; 8385 })) 8386 Kind = Present; 8387 const auto *EI = C->getVarRefs().begin(); 8388 for (const auto L : C->component_lists()) { 8389 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8390 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8391 C->isImplicit(), std::get<2>(L), *EI); 8392 ++EI; 8393 } 8394 } 8395 for (const auto *Cl : Clauses) { 8396 const auto *C = dyn_cast<OMPFromClause>(Cl); 8397 if (!C) 8398 continue; 8399 MapKind Kind = Other; 8400 if (!C->getMotionModifiers().empty() && 8401 llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) { 8402 return K == OMPC_MOTION_MODIFIER_present; 8403 })) 8404 Kind = Present; 8405 const auto *EI = C->getVarRefs().begin(); 8406 for (const auto L : C->component_lists()) { 8407 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8408 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8409 C->isImplicit(), std::get<2>(L), *EI); 8410 ++EI; 8411 } 8412 } 8413 8414 // Look at the use_device_ptr clause information and mark the existing map 8415 // entries as such. If there is no map information for an entry in the 8416 // use_device_ptr list, we create one with map type 'alloc' and zero size 8417 // section. It is the user fault if that was not mapped before. If there is 8418 // no map information and the pointer is a struct member, then we defer the 8419 // emission of that entry until the whole struct has been processed. 8420 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8421 SmallVector<DeferredDevicePtrEntryTy, 4>> 8422 DeferredInfo; 8423 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8424 8425 for (const auto *Cl : Clauses) { 8426 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8427 if (!C) 8428 continue; 8429 for (const auto L : C->component_lists()) { 8430 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8431 std::get<1>(L); 8432 assert(!Components.empty() && 8433 "Not expecting empty list of components!"); 8434 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8435 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8436 const Expr *IE = Components.back().getAssociatedExpression(); 8437 // If the first component is a member expression, we have to look into 8438 // 'this', which maps to null in the map of map information. Otherwise 8439 // look directly for the information. 8440 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8441 8442 // We potentially have map information for this declaration already. 8443 // Look for the first set of components that refer to it. 8444 if (It != Info.end()) { 8445 bool Found = false; 8446 for (auto &Data : It->second) { 8447 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8448 return MI.Components.back().getAssociatedDeclaration() == VD; 8449 }); 8450 // If we found a map entry, signal that the pointer has to be 8451 // returned and move on to the next declaration. Exclude cases where 8452 // the base pointer is mapped as array subscript, array section or 8453 // array shaping. The base address is passed as a pointer to base in 8454 // this case and cannot be used as a base for use_device_ptr list 8455 // item. 8456 if (CI != Data.end()) { 8457 auto PrevCI = std::next(CI->Components.rbegin()); 8458 const auto *VarD = dyn_cast<VarDecl>(VD); 8459 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8460 isa<MemberExpr>(IE) || 8461 !VD->getType().getNonReferenceType()->isPointerType() || 8462 PrevCI == CI->Components.rend() || 8463 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8464 VarD->hasLocalStorage()) { 8465 CI->ReturnDevicePointer = true; 8466 Found = true; 8467 break; 8468 } 8469 } 8470 } 8471 if (Found) 8472 continue; 8473 } 8474 8475 // We didn't find any match in our map information - generate a zero 8476 // size array section - if the pointer is a struct member we defer this 8477 // action until the whole struct has been processed. 8478 if (isa<MemberExpr>(IE)) { 8479 // Insert the pointer into Info to be processed by 8480 // generateInfoForComponentList. Because it is a member pointer 8481 // without a pointee, no entry will be generated for it, therefore 8482 // we need to generate one after the whole struct has been processed. 8483 // Nonetheless, generateInfoForComponentList must be called to take 8484 // the pointer into account for the calculation of the range of the 8485 // partial struct. 8486 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8487 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8488 nullptr); 8489 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8490 } else { 8491 llvm::Value *Ptr = 8492 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8493 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8494 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8495 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8496 UseDevicePtrCombinedInfo.Sizes.push_back( 8497 llvm::Constant::getNullValue(CGF.Int64Ty)); 8498 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8499 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8500 } 8501 } 8502 } 8503 8504 // Look at the use_device_addr clause information and mark the existing map 8505 // entries as such. If there is no map information for an entry in the 8506 // use_device_addr list, we create one with map type 'alloc' and zero size 8507 // section. It is the user fault if that was not mapped before. If there is 8508 // no map information and the pointer is a struct member, then we defer the 8509 // emission of that entry until the whole struct has been processed. 8510 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8511 for (const auto *Cl : Clauses) { 8512 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8513 if (!C) 8514 continue; 8515 for (const auto L : C->component_lists()) { 8516 assert(!std::get<1>(L).empty() && 8517 "Not expecting empty list of components!"); 8518 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8519 if (!Processed.insert(VD).second) 8520 continue; 8521 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8522 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8523 // If the first component is a member expression, we have to look into 8524 // 'this', which maps to null in the map of map information. Otherwise 8525 // look directly for the information. 8526 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8527 8528 // We potentially have map information for this declaration already. 8529 // Look for the first set of components that refer to it. 8530 if (It != Info.end()) { 8531 bool Found = false; 8532 for (auto &Data : It->second) { 8533 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8534 return MI.Components.back().getAssociatedDeclaration() == VD; 8535 }); 8536 // If we found a map entry, signal that the pointer has to be 8537 // returned and move on to the next declaration. 8538 if (CI != Data.end()) { 8539 CI->ReturnDevicePointer = true; 8540 Found = true; 8541 break; 8542 } 8543 } 8544 if (Found) 8545 continue; 8546 } 8547 8548 // We didn't find any match in our map information - generate a zero 8549 // size array section - if the pointer is a struct member we defer this 8550 // action until the whole struct has been processed. 8551 if (isa<MemberExpr>(IE)) { 8552 // Insert the pointer into Info to be processed by 8553 // generateInfoForComponentList. Because it is a member pointer 8554 // without a pointee, no entry will be generated for it, therefore 8555 // we need to generate one after the whole struct has been processed. 8556 // Nonetheless, generateInfoForComponentList must be called to take 8557 // the pointer into account for the calculation of the range of the 8558 // partial struct. 8559 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8560 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8561 nullptr, nullptr, /*ForDeviceAddr=*/true); 8562 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8563 } else { 8564 llvm::Value *Ptr; 8565 if (IE->isGLValue()) 8566 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8567 else 8568 Ptr = CGF.EmitScalarExpr(IE); 8569 CombinedInfo.Exprs.push_back(VD); 8570 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8571 CombinedInfo.Pointers.push_back(Ptr); 8572 CombinedInfo.Sizes.push_back( 8573 llvm::Constant::getNullValue(CGF.Int64Ty)); 8574 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8575 CombinedInfo.Mappers.push_back(nullptr); 8576 } 8577 } 8578 } 8579 8580 for (const auto &Data : Info) { 8581 StructRangeInfoTy PartialStruct; 8582 // Temporary generated information. 8583 MapCombinedInfoTy CurInfo; 8584 const Decl *D = Data.first; 8585 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8586 for (const auto &M : Data.second) { 8587 for (const MapInfo &L : M) { 8588 assert(!L.Components.empty() && 8589 "Not expecting declaration with no component lists."); 8590 8591 // Remember the current base pointer index. 8592 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8593 CurInfo.NonContigInfo.IsNonContiguous = 8594 L.Components.back().isNonContiguous(); 8595 generateInfoForComponentList( 8596 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8597 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8598 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8599 8600 // If this entry relates with a device pointer, set the relevant 8601 // declaration and add the 'return pointer' flag. 8602 if (L.ReturnDevicePointer) { 8603 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8604 "Unexpected number of mapped base pointers."); 8605 8606 const ValueDecl *RelevantVD = 8607 L.Components.back().getAssociatedDeclaration(); 8608 assert(RelevantVD && 8609 "No relevant declaration related with device pointer??"); 8610 8611 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8612 RelevantVD); 8613 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8614 } 8615 } 8616 } 8617 8618 // Append any pending zero-length pointers which are struct members and 8619 // used with use_device_ptr or use_device_addr. 8620 auto CI = DeferredInfo.find(Data.first); 8621 if (CI != DeferredInfo.end()) { 8622 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8623 llvm::Value *BasePtr; 8624 llvm::Value *Ptr; 8625 if (L.ForDeviceAddr) { 8626 if (L.IE->isGLValue()) 8627 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8628 else 8629 Ptr = this->CGF.EmitScalarExpr(L.IE); 8630 BasePtr = Ptr; 8631 // Entry is RETURN_PARAM. Also, set the placeholder value 8632 // MEMBER_OF=FFFF so that the entry is later updated with the 8633 // correct value of MEMBER_OF. 8634 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8635 } else { 8636 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8637 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8638 L.IE->getExprLoc()); 8639 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8640 // placeholder value MEMBER_OF=FFFF so that the entry is later 8641 // updated with the correct value of MEMBER_OF. 8642 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8643 OMP_MAP_MEMBER_OF); 8644 } 8645 CurInfo.Exprs.push_back(L.VD); 8646 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8647 CurInfo.Pointers.push_back(Ptr); 8648 CurInfo.Sizes.push_back( 8649 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8650 CurInfo.Mappers.push_back(nullptr); 8651 } 8652 } 8653 // If there is an entry in PartialStruct it means we have a struct with 8654 // individual members mapped. Emit an extra combined entry. 8655 if (PartialStruct.Base.isValid()) { 8656 CurInfo.NonContigInfo.Dims.push_back(0); 8657 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8658 } 8659 8660 // We need to append the results of this capture to what we already 8661 // have. 8662 CombinedInfo.append(CurInfo); 8663 } 8664 // Append data for use_device_ptr clauses. 8665 CombinedInfo.append(UseDevicePtrCombinedInfo); 8666 } 8667 8668public: 8669 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8670 : CurDir(&Dir), CGF(CGF) { 8671 // Extract firstprivate clause information. 8672 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8673 for (const auto *D : C->varlists()) 8674 FirstPrivateDecls.try_emplace( 8675 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8676 // Extract implicit firstprivates from uses_allocators clauses. 8677 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8678 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8679 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8680 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8681 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8682 /*Implicit=*/true); 8683 else if (const auto *VD = dyn_cast<VarDecl>( 8684 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8685 ->getDecl())) 8686 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8687 } 8688 } 8689 // Extract device pointer clause information. 8690 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8691 for (auto L : C->component_lists()) 8692 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8693 } 8694 8695 /// Constructor for the declare mapper directive. 8696 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8697 : CurDir(&Dir), CGF(CGF) {} 8698 8699 /// Generate code for the combined entry if we have a partially mapped struct 8700 /// and take care of the mapping flags of the arguments corresponding to 8701 /// individual struct members. 8702 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8703 MapFlagsArrayTy &CurTypes, 8704 const StructRangeInfoTy &PartialStruct, 8705 const ValueDecl *VD = nullptr, 8706 bool NotTargetParams = true) const { 8707 if (CurTypes.size() == 1 && 8708 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8709 !PartialStruct.IsArraySection) 8710 return; 8711 Address LBAddr = PartialStruct.LowestElem.second; 8712 Address HBAddr = PartialStruct.HighestElem.second; 8713 if (PartialStruct.HasCompleteRecord) { 8714 LBAddr = PartialStruct.LB; 8715 HBAddr = PartialStruct.LB; 8716 } 8717 CombinedInfo.Exprs.push_back(VD); 8718 // Base is the base of the struct 8719 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8720 // Pointer is the address of the lowest element 8721 llvm::Value *LB = LBAddr.getPointer(); 8722 CombinedInfo.Pointers.push_back(LB); 8723 // There should not be a mapper for a combined entry. 8724 CombinedInfo.Mappers.push_back(nullptr); 8725 // Size is (addr of {highest+1} element) - (addr of lowest element) 8726 llvm::Value *HB = HBAddr.getPointer(); 8727 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8728 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8729 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8730 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8731 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8732 /*isSigned=*/false); 8733 CombinedInfo.Sizes.push_back(Size); 8734 // Map type is always TARGET_PARAM, if generate info for captures. 8735 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8736 : OMP_MAP_TARGET_PARAM); 8737 // If any element has the present modifier, then make sure the runtime 8738 // doesn't attempt to allocate the struct. 8739 if (CurTypes.end() != 8740 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8741 return Type & OMP_MAP_PRESENT; 8742 })) 8743 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8744 // Remove TARGET_PARAM flag from the first element 8745 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8746 8747 // All other current entries will be MEMBER_OF the combined entry 8748 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8749 // 0xFFFF in the MEMBER_OF field). 8750 OpenMPOffloadMappingFlags MemberOfFlag = 8751 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8752 for (auto &M : CurTypes) 8753 setCorrectMemberOfFlag(M, MemberOfFlag); 8754 } 8755 8756 /// Generate all the base pointers, section pointers, sizes, map types, and 8757 /// mappers for the extracted mappable expressions (all included in \a 8758 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8759 /// pair of the relevant declaration and index where it occurs is appended to 8760 /// the device pointers info array. 8761 void generateAllInfo( 8762 MapCombinedInfoTy &CombinedInfo, 8763 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8764 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8765 assert(CurDir.is<const OMPExecutableDirective *>() && 8766 "Expect a executable directive"); 8767 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8768 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 8769 } 8770 8771 /// Generate all the base pointers, section pointers, sizes, map types, and 8772 /// mappers for the extracted map clauses of user-defined mapper (all included 8773 /// in \a CombinedInfo). 8774 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8775 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8776 "Expect a declare mapper directive"); 8777 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8778 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 8779 } 8780 8781 /// Emit capture info for lambdas for variables captured by reference. 8782 void generateInfoForLambdaCaptures( 8783 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8784 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8785 const auto *RD = VD->getType() 8786 .getCanonicalType() 8787 .getNonReferenceType() 8788 ->getAsCXXRecordDecl(); 8789 if (!RD || !RD->isLambda()) 8790 return; 8791 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8792 LValue VDLVal = CGF.MakeAddrLValue( 8793 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8794 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8795 FieldDecl *ThisCapture = nullptr; 8796 RD->getCaptureFields(Captures, ThisCapture); 8797 if (ThisCapture) { 8798 LValue ThisLVal = 8799 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8800 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8801 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8802 VDLVal.getPointer(CGF)); 8803 CombinedInfo.Exprs.push_back(VD); 8804 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8805 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8806 CombinedInfo.Sizes.push_back( 8807 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8808 CGF.Int64Ty, /*isSigned=*/true)); 8809 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8810 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8811 CombinedInfo.Mappers.push_back(nullptr); 8812 } 8813 for (const LambdaCapture &LC : RD->captures()) { 8814 if (!LC.capturesVariable()) 8815 continue; 8816 const VarDecl *VD = LC.getCapturedVar(); 8817 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8818 continue; 8819 auto It = Captures.find(VD); 8820 assert(It != Captures.end() && "Found lambda capture without field."); 8821 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8822 if (LC.getCaptureKind() == LCK_ByRef) { 8823 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8824 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8825 VDLVal.getPointer(CGF)); 8826 CombinedInfo.Exprs.push_back(VD); 8827 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8828 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8829 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8830 CGF.getTypeSize( 8831 VD->getType().getCanonicalType().getNonReferenceType()), 8832 CGF.Int64Ty, /*isSigned=*/true)); 8833 } else { 8834 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8835 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8836 VDLVal.getPointer(CGF)); 8837 CombinedInfo.Exprs.push_back(VD); 8838 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8839 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8840 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8841 } 8842 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8843 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8844 CombinedInfo.Mappers.push_back(nullptr); 8845 } 8846 } 8847 8848 /// Set correct indices for lambdas captures. 8849 void adjustMemberOfForLambdaCaptures( 8850 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8851 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8852 MapFlagsArrayTy &Types) const { 8853 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8854 // Set correct member_of idx for all implicit lambda captures. 8855 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8856 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8857 continue; 8858 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8859 assert(BasePtr && "Unable to find base lambda address."); 8860 int TgtIdx = -1; 8861 for (unsigned J = I; J > 0; --J) { 8862 unsigned Idx = J - 1; 8863 if (Pointers[Idx] != BasePtr) 8864 continue; 8865 TgtIdx = Idx; 8866 break; 8867 } 8868 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8869 // All other current entries will be MEMBER_OF the combined entry 8870 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8871 // 0xFFFF in the MEMBER_OF field). 8872 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8873 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8874 } 8875 } 8876 8877 /// Generate the base pointers, section pointers, sizes, map types, and 8878 /// mappers associated to a given capture (all included in \a CombinedInfo). 8879 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8880 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8881 StructRangeInfoTy &PartialStruct) const { 8882 assert(!Cap->capturesVariableArrayType() && 8883 "Not expecting to generate map info for a variable array type!"); 8884 8885 // We need to know when we generating information for the first component 8886 const ValueDecl *VD = Cap->capturesThis() 8887 ? nullptr 8888 : Cap->getCapturedVar()->getCanonicalDecl(); 8889 8890 // If this declaration appears in a is_device_ptr clause we just have to 8891 // pass the pointer by value. If it is a reference to a declaration, we just 8892 // pass its value. 8893 if (DevPointersMap.count(VD)) { 8894 CombinedInfo.Exprs.push_back(VD); 8895 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8896 CombinedInfo.Pointers.push_back(Arg); 8897 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8898 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8899 /*isSigned=*/true)); 8900 CombinedInfo.Types.push_back( 8901 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 8902 OMP_MAP_TARGET_PARAM); 8903 CombinedInfo.Mappers.push_back(nullptr); 8904 return; 8905 } 8906 8907 using MapData = 8908 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8909 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8910 const ValueDecl *, const Expr *>; 8911 SmallVector<MapData, 4> DeclComponentLists; 8912 assert(CurDir.is<const OMPExecutableDirective *>() && 8913 "Expect a executable directive"); 8914 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8915 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8916 const auto *EI = C->getVarRefs().begin(); 8917 for (const auto L : C->decl_component_lists(VD)) { 8918 const ValueDecl *VDecl, *Mapper; 8919 // The Expression is not correct if the mapping is implicit 8920 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8921 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8922 std::tie(VDecl, Components, Mapper) = L; 8923 assert(VDecl == VD && "We got information for the wrong declaration??"); 8924 assert(!Components.empty() && 8925 "Not expecting declaration with no component lists."); 8926 DeclComponentLists.emplace_back(Components, C->getMapType(), 8927 C->getMapTypeModifiers(), 8928 C->isImplicit(), Mapper, E); 8929 ++EI; 8930 } 8931 } 8932 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8933 const MapData &RHS) { 8934 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8935 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8936 bool HasPresent = !MapModifiers.empty() && 8937 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8938 return K == clang::OMPC_MAP_MODIFIER_present; 8939 }); 8940 bool HasAllocs = MapType == OMPC_MAP_alloc; 8941 MapModifiers = std::get<2>(RHS); 8942 MapType = std::get<1>(LHS); 8943 bool HasPresentR = 8944 !MapModifiers.empty() && 8945 llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) { 8946 return K == clang::OMPC_MAP_MODIFIER_present; 8947 }); 8948 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8949 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8950 }); 8951 8952 // Find overlapping elements (including the offset from the base element). 8953 llvm::SmallDenseMap< 8954 const MapData *, 8955 llvm::SmallVector< 8956 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8957 4> 8958 OverlappedData; 8959 size_t Count = 0; 8960 for (const MapData &L : DeclComponentLists) { 8961 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8962 OpenMPMapClauseKind MapType; 8963 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8964 bool IsImplicit; 8965 const ValueDecl *Mapper; 8966 const Expr *VarRef; 8967 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8968 L; 8969 ++Count; 8970 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8971 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8972 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8973 VarRef) = L1; 8974 auto CI = Components.rbegin(); 8975 auto CE = Components.rend(); 8976 auto SI = Components1.rbegin(); 8977 auto SE = Components1.rend(); 8978 for (; CI != CE && SI != SE; ++CI, ++SI) { 8979 if (CI->getAssociatedExpression()->getStmtClass() != 8980 SI->getAssociatedExpression()->getStmtClass()) 8981 break; 8982 // Are we dealing with different variables/fields? 8983 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8984 break; 8985 } 8986 // Found overlapping if, at least for one component, reached the head 8987 // of the components list. 8988 if (CI == CE || SI == SE) { 8989 // Ignore it if it is the same component. 8990 if (CI == CE && SI == SE) 8991 continue; 8992 const auto It = (SI == SE) ? CI : SI; 8993 // If one component is a pointer and another one is a kind of 8994 // dereference of this pointer (array subscript, section, dereference, 8995 // etc.), it is not an overlapping. 8996 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 8997 std::prev(It) 8998 ->getAssociatedExpression() 8999 ->getType() 9000 ->isPointerType()) 9001 continue; 9002 const MapData &BaseData = CI == CE ? L : L1; 9003 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9004 SI == SE ? Components : Components1; 9005 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9006 OverlappedElements.getSecond().push_back(SubData); 9007 } 9008 } 9009 } 9010 // Sort the overlapped elements for each item. 9011 llvm::SmallVector<const FieldDecl *, 4> Layout; 9012 if (!OverlappedData.empty()) { 9013 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9014 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9015 while (BaseType != OrigType) { 9016 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9017 OrigType = BaseType->getPointeeOrArrayElementType(); 9018 } 9019 9020 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9021 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9022 else { 9023 const auto *RD = BaseType->getAsRecordDecl(); 9024 Layout.append(RD->field_begin(), RD->field_end()); 9025 } 9026 } 9027 for (auto &Pair : OverlappedData) { 9028 llvm::stable_sort( 9029 Pair.getSecond(), 9030 [&Layout]( 9031 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9032 OMPClauseMappableExprCommon::MappableExprComponentListRef 9033 Second) { 9034 auto CI = First.rbegin(); 9035 auto CE = First.rend(); 9036 auto SI = Second.rbegin(); 9037 auto SE = Second.rend(); 9038 for (; CI != CE && SI != SE; ++CI, ++SI) { 9039 if (CI->getAssociatedExpression()->getStmtClass() != 9040 SI->getAssociatedExpression()->getStmtClass()) 9041 break; 9042 // Are we dealing with different variables/fields? 9043 if (CI->getAssociatedDeclaration() != 9044 SI->getAssociatedDeclaration()) 9045 break; 9046 } 9047 9048 // Lists contain the same elements. 9049 if (CI == CE && SI == SE) 9050 return false; 9051 9052 // List with less elements is less than list with more elements. 9053 if (CI == CE || SI == SE) 9054 return CI == CE; 9055 9056 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9057 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9058 if (FD1->getParent() == FD2->getParent()) 9059 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9060 const auto It = 9061 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9062 return FD == FD1 || FD == FD2; 9063 }); 9064 return *It == FD1; 9065 }); 9066 } 9067 9068 // Associated with a capture, because the mapping flags depend on it. 9069 // Go through all of the elements with the overlapped elements. 9070 bool IsFirstComponentList = true; 9071 for (const auto &Pair : OverlappedData) { 9072 const MapData &L = *Pair.getFirst(); 9073 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9074 OpenMPMapClauseKind MapType; 9075 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9076 bool IsImplicit; 9077 const ValueDecl *Mapper; 9078 const Expr *VarRef; 9079 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9080 L; 9081 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9082 OverlappedComponents = Pair.getSecond(); 9083 generateInfoForComponentList( 9084 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9085 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9086 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9087 IsFirstComponentList = false; 9088 } 9089 // Go through other elements without overlapped elements. 9090 for (const MapData &L : DeclComponentLists) { 9091 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9092 OpenMPMapClauseKind MapType; 9093 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9094 bool IsImplicit; 9095 const ValueDecl *Mapper; 9096 const Expr *VarRef; 9097 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9098 L; 9099 auto It = OverlappedData.find(&L); 9100 if (It == OverlappedData.end()) 9101 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9102 Components, CombinedInfo, PartialStruct, 9103 IsFirstComponentList, IsImplicit, Mapper, 9104 /*ForDeviceAddr=*/false, VD, VarRef); 9105 IsFirstComponentList = false; 9106 } 9107 } 9108 9109 /// Generate the default map information for a given capture \a CI, 9110 /// record field declaration \a RI and captured value \a CV. 9111 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9112 const FieldDecl &RI, llvm::Value *CV, 9113 MapCombinedInfoTy &CombinedInfo) const { 9114 bool IsImplicit = true; 9115 // Do the default mapping. 9116 if (CI.capturesThis()) { 9117 CombinedInfo.Exprs.push_back(nullptr); 9118 CombinedInfo.BasePointers.push_back(CV); 9119 CombinedInfo.Pointers.push_back(CV); 9120 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9121 CombinedInfo.Sizes.push_back( 9122 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9123 CGF.Int64Ty, /*isSigned=*/true)); 9124 // Default map type. 9125 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9126 } else if (CI.capturesVariableByCopy()) { 9127 const VarDecl *VD = CI.getCapturedVar(); 9128 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9129 CombinedInfo.BasePointers.push_back(CV); 9130 CombinedInfo.Pointers.push_back(CV); 9131 if (!RI.getType()->isAnyPointerType()) { 9132 // We have to signal to the runtime captures passed by value that are 9133 // not pointers. 9134 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9135 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9136 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9137 } else { 9138 // Pointers are implicitly mapped with a zero size and no flags 9139 // (other than first map that is added for all implicit maps). 9140 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9141 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9142 } 9143 auto I = FirstPrivateDecls.find(VD); 9144 if (I != FirstPrivateDecls.end()) 9145 IsImplicit = I->getSecond(); 9146 } else { 9147 assert(CI.capturesVariable() && "Expected captured reference."); 9148 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9149 QualType ElementType = PtrTy->getPointeeType(); 9150 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9151 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9152 // The default map type for a scalar/complex type is 'to' because by 9153 // default the value doesn't have to be retrieved. For an aggregate 9154 // type, the default is 'tofrom'. 9155 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9156 const VarDecl *VD = CI.getCapturedVar(); 9157 auto I = FirstPrivateDecls.find(VD); 9158 if (I != FirstPrivateDecls.end() && 9159 VD->getType().isConstant(CGF.getContext())) { 9160 llvm::Constant *Addr = 9161 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 9162 // Copy the value of the original variable to the new global copy. 9163 CGF.Builder.CreateMemCpy( 9164 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 9165 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 9166 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 9167 // Use new global variable as the base pointers. 9168 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9169 CombinedInfo.BasePointers.push_back(Addr); 9170 CombinedInfo.Pointers.push_back(Addr); 9171 } else { 9172 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9173 CombinedInfo.BasePointers.push_back(CV); 9174 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9175 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9176 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9177 AlignmentSource::Decl)); 9178 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9179 } else { 9180 CombinedInfo.Pointers.push_back(CV); 9181 } 9182 } 9183 if (I != FirstPrivateDecls.end()) 9184 IsImplicit = I->getSecond(); 9185 } 9186 // Every default map produces a single argument which is a target parameter. 9187 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9188 9189 // Add flag stating this is an implicit map. 9190 if (IsImplicit) 9191 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9192 9193 // No user-defined mapper for default mapping. 9194 CombinedInfo.Mappers.push_back(nullptr); 9195 } 9196}; 9197} // anonymous namespace 9198 9199static void emitNonContiguousDescriptor( 9200 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9201 CGOpenMPRuntime::TargetDataInfo &Info) { 9202 CodeGenModule &CGM = CGF.CGM; 9203 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9204 &NonContigInfo = CombinedInfo.NonContigInfo; 9205 9206 // Build an array of struct descriptor_dim and then assign it to 9207 // offload_args. 9208 // 9209 // struct descriptor_dim { 9210 // uint64_t offset; 9211 // uint64_t count; 9212 // uint64_t stride 9213 // }; 9214 ASTContext &C = CGF.getContext(); 9215 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9216 RecordDecl *RD; 9217 RD = C.buildImplicitRecord("descriptor_dim"); 9218 RD->startDefinition(); 9219 addFieldToRecordDecl(C, RD, Int64Ty); 9220 addFieldToRecordDecl(C, RD, Int64Ty); 9221 addFieldToRecordDecl(C, RD, Int64Ty); 9222 RD->completeDefinition(); 9223 QualType DimTy = C.getRecordType(RD); 9224 9225 enum { OffsetFD = 0, CountFD, StrideFD }; 9226 // We need two index variable here since the size of "Dims" is the same as the 9227 // size of Components, however, the size of offset, count, and stride is equal 9228 // to the size of base declaration that is non-contiguous. 9229 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9230 // Skip emitting ir if dimension size is 1 since it cannot be 9231 // non-contiguous. 9232 if (NonContigInfo.Dims[I] == 1) 9233 continue; 9234 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9235 QualType ArrayTy = 9236 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9237 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9238 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9239 unsigned RevIdx = EE - II - 1; 9240 LValue DimsLVal = CGF.MakeAddrLValue( 9241 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9242 // Offset 9243 LValue OffsetLVal = CGF.EmitLValueForField( 9244 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9245 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9246 // Count 9247 LValue CountLVal = CGF.EmitLValueForField( 9248 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9249 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9250 // Stride 9251 LValue StrideLVal = CGF.EmitLValueForField( 9252 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9253 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9254 } 9255 // args[I] = &dims 9256 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9257 DimsAddr, CGM.Int8PtrTy); 9258 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9259 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9260 Info.PointersArray, 0, I); 9261 Address PAddr(P, CGF.getPointerAlign()); 9262 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9263 ++L; 9264 } 9265} 9266 9267/// Emit a string constant containing the names of the values mapped to the 9268/// offloading runtime library. 9269llvm::Constant * 9270emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9271 MappableExprsHandler::MappingExprInfo &MapExprs) { 9272 llvm::Constant *SrcLocStr; 9273 if (!MapExprs.getMapDecl()) { 9274 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 9275 } else { 9276 std::string ExprName = ""; 9277 if (MapExprs.getMapExpr()) { 9278 PrintingPolicy P(CGF.getContext().getLangOpts()); 9279 llvm::raw_string_ostream OS(ExprName); 9280 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9281 OS.flush(); 9282 } else { 9283 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9284 } 9285 9286 SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); 9287 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9288 const char *FileName = PLoc.getFilename(); 9289 unsigned Line = PLoc.getLine(); 9290 unsigned Column = PLoc.getColumn(); 9291 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), 9292 Line, Column); 9293 } 9294 return SrcLocStr; 9295} 9296 9297/// Emit the arrays used to pass the captures and map information to the 9298/// offloading runtime library. If there is no map or capture information, 9299/// return nullptr by reference. 9300static void emitOffloadingArrays( 9301 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9302 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9303 bool IsNonContiguous = false) { 9304 CodeGenModule &CGM = CGF.CGM; 9305 ASTContext &Ctx = CGF.getContext(); 9306 9307 // Reset the array information. 9308 Info.clearArrayInfo(); 9309 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9310 9311 if (Info.NumberOfPtrs) { 9312 // Detect if we have any capture size requiring runtime evaluation of the 9313 // size so that a constant array could be eventually used. 9314 bool hasRuntimeEvaluationCaptureSize = false; 9315 for (llvm::Value *S : CombinedInfo.Sizes) 9316 if (!isa<llvm::Constant>(S)) { 9317 hasRuntimeEvaluationCaptureSize = true; 9318 break; 9319 } 9320 9321 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9322 QualType PointerArrayType = Ctx.getConstantArrayType( 9323 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9324 /*IndexTypeQuals=*/0); 9325 9326 Info.BasePointersArray = 9327 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9328 Info.PointersArray = 9329 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9330 Address MappersArray = 9331 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9332 Info.MappersArray = MappersArray.getPointer(); 9333 9334 // If we don't have any VLA types or other types that require runtime 9335 // evaluation, we can use a constant array for the map sizes, otherwise we 9336 // need to fill up the arrays as we do for the pointers. 9337 QualType Int64Ty = 9338 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9339 if (hasRuntimeEvaluationCaptureSize) { 9340 QualType SizeArrayType = Ctx.getConstantArrayType( 9341 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9342 /*IndexTypeQuals=*/0); 9343 Info.SizesArray = 9344 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9345 } else { 9346 // We expect all the sizes to be constant, so we collect them to create 9347 // a constant array. 9348 SmallVector<llvm::Constant *, 16> ConstSizes; 9349 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9350 if (IsNonContiguous && 9351 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9352 ConstSizes.push_back(llvm::ConstantInt::get( 9353 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9354 } else { 9355 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9356 } 9357 } 9358 9359 auto *SizesArrayInit = llvm::ConstantArray::get( 9360 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9361 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9362 auto *SizesArrayGbl = new llvm::GlobalVariable( 9363 CGM.getModule(), SizesArrayInit->getType(), 9364 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9365 SizesArrayInit, Name); 9366 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9367 Info.SizesArray = SizesArrayGbl; 9368 } 9369 9370 // The map types are always constant so we don't need to generate code to 9371 // fill arrays. Instead, we create an array constant. 9372 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9373 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9374 std::string MaptypesName = 9375 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9376 auto *MapTypesArrayGbl = 9377 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9378 Info.MapTypesArray = MapTypesArrayGbl; 9379 9380 // The information types are only built if there is debug information 9381 // requested. 9382 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9383 Info.MapNamesArray = llvm::Constant::getNullValue( 9384 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9385 } else { 9386 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9387 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9388 }; 9389 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9390 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9391 std::string MapnamesName = 9392 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9393 auto *MapNamesArrayGbl = 9394 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9395 Info.MapNamesArray = MapNamesArrayGbl; 9396 } 9397 9398 // If there's a present map type modifier, it must not be applied to the end 9399 // of a region, so generate a separate map type array in that case. 9400 if (Info.separateBeginEndCalls()) { 9401 bool EndMapTypesDiffer = false; 9402 for (uint64_t &Type : Mapping) { 9403 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9404 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9405 EndMapTypesDiffer = true; 9406 } 9407 } 9408 if (EndMapTypesDiffer) { 9409 MapTypesArrayGbl = 9410 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9411 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9412 } 9413 } 9414 9415 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9416 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9417 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9418 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9419 Info.BasePointersArray, 0, I); 9420 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9421 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9422 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9423 CGF.Builder.CreateStore(BPVal, BPAddr); 9424 9425 if (Info.requiresDevicePointerInfo()) 9426 if (const ValueDecl *DevVD = 9427 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9428 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9429 9430 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9431 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9432 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9433 Info.PointersArray, 0, I); 9434 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9435 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9436 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9437 CGF.Builder.CreateStore(PVal, PAddr); 9438 9439 if (hasRuntimeEvaluationCaptureSize) { 9440 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9441 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9442 Info.SizesArray, 9443 /*Idx0=*/0, 9444 /*Idx1=*/I); 9445 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9446 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9447 CGM.Int64Ty, 9448 /*isSigned=*/true), 9449 SAddr); 9450 } 9451 9452 // Fill up the mapper array. 9453 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9454 if (CombinedInfo.Mappers[I]) { 9455 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9456 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9457 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9458 Info.HasMapper = true; 9459 } 9460 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9461 CGF.Builder.CreateStore(MFunc, MAddr); 9462 } 9463 } 9464 9465 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9466 Info.NumberOfPtrs == 0) 9467 return; 9468 9469 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9470} 9471 9472namespace { 9473/// Additional arguments for emitOffloadingArraysArgument function. 9474struct ArgumentsOptions { 9475 bool ForEndCall = false; 9476 ArgumentsOptions() = default; 9477 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9478}; 9479} // namespace 9480 9481/// Emit the arguments to be passed to the runtime library based on the 9482/// arrays of base pointers, pointers, sizes, map types, and mappers. If 9483/// ForEndCall, emit map types to be passed for the end of the region instead of 9484/// the beginning. 9485static void emitOffloadingArraysArgument( 9486 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9487 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9488 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9489 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9490 const ArgumentsOptions &Options = ArgumentsOptions()) { 9491 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9492 "expected region end call to runtime only when end call is separate"); 9493 CodeGenModule &CGM = CGF.CGM; 9494 if (Info.NumberOfPtrs) { 9495 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9496 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9497 Info.BasePointersArray, 9498 /*Idx0=*/0, /*Idx1=*/0); 9499 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9500 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9501 Info.PointersArray, 9502 /*Idx0=*/0, 9503 /*Idx1=*/0); 9504 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9505 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9506 /*Idx0=*/0, /*Idx1=*/0); 9507 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9508 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9509 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9510 : Info.MapTypesArray, 9511 /*Idx0=*/0, 9512 /*Idx1=*/0); 9513 9514 // Only emit the mapper information arrays if debug information is 9515 // requested. 9516 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9517 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9518 else 9519 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9520 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9521 Info.MapNamesArray, 9522 /*Idx0=*/0, 9523 /*Idx1=*/0); 9524 // If there is no user-defined mapper, set the mapper array to nullptr to 9525 // avoid an unnecessary data privatization 9526 if (!Info.HasMapper) 9527 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9528 else 9529 MappersArrayArg = 9530 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9531 } else { 9532 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9533 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9534 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9535 MapTypesArrayArg = 9536 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9537 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9538 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9539 } 9540} 9541 9542/// Check for inner distribute directive. 9543static const OMPExecutableDirective * 9544getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9545 const auto *CS = D.getInnermostCapturedStmt(); 9546 const auto *Body = 9547 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9548 const Stmt *ChildStmt = 9549 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9550 9551 if (const auto *NestedDir = 9552 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9553 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9554 switch (D.getDirectiveKind()) { 9555 case OMPD_target: 9556 if (isOpenMPDistributeDirective(DKind)) 9557 return NestedDir; 9558 if (DKind == OMPD_teams) { 9559 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9560 /*IgnoreCaptured=*/true); 9561 if (!Body) 9562 return nullptr; 9563 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9564 if (const auto *NND = 9565 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9566 DKind = NND->getDirectiveKind(); 9567 if (isOpenMPDistributeDirective(DKind)) 9568 return NND; 9569 } 9570 } 9571 return nullptr; 9572 case OMPD_target_teams: 9573 if (isOpenMPDistributeDirective(DKind)) 9574 return NestedDir; 9575 return nullptr; 9576 case OMPD_target_parallel: 9577 case OMPD_target_simd: 9578 case OMPD_target_parallel_for: 9579 case OMPD_target_parallel_for_simd: 9580 return nullptr; 9581 case OMPD_target_teams_distribute: 9582 case OMPD_target_teams_distribute_simd: 9583 case OMPD_target_teams_distribute_parallel_for: 9584 case OMPD_target_teams_distribute_parallel_for_simd: 9585 case OMPD_parallel: 9586 case OMPD_for: 9587 case OMPD_parallel_for: 9588 case OMPD_parallel_master: 9589 case OMPD_parallel_sections: 9590 case OMPD_for_simd: 9591 case OMPD_parallel_for_simd: 9592 case OMPD_cancel: 9593 case OMPD_cancellation_point: 9594 case OMPD_ordered: 9595 case OMPD_threadprivate: 9596 case OMPD_allocate: 9597 case OMPD_task: 9598 case OMPD_simd: 9599 case OMPD_tile: 9600 case OMPD_sections: 9601 case OMPD_section: 9602 case OMPD_single: 9603 case OMPD_master: 9604 case OMPD_critical: 9605 case OMPD_taskyield: 9606 case OMPD_barrier: 9607 case OMPD_taskwait: 9608 case OMPD_taskgroup: 9609 case OMPD_atomic: 9610 case OMPD_flush: 9611 case OMPD_depobj: 9612 case OMPD_scan: 9613 case OMPD_teams: 9614 case OMPD_target_data: 9615 case OMPD_target_exit_data: 9616 case OMPD_target_enter_data: 9617 case OMPD_distribute: 9618 case OMPD_distribute_simd: 9619 case OMPD_distribute_parallel_for: 9620 case OMPD_distribute_parallel_for_simd: 9621 case OMPD_teams_distribute: 9622 case OMPD_teams_distribute_simd: 9623 case OMPD_teams_distribute_parallel_for: 9624 case OMPD_teams_distribute_parallel_for_simd: 9625 case OMPD_target_update: 9626 case OMPD_declare_simd: 9627 case OMPD_declare_variant: 9628 case OMPD_begin_declare_variant: 9629 case OMPD_end_declare_variant: 9630 case OMPD_declare_target: 9631 case OMPD_end_declare_target: 9632 case OMPD_declare_reduction: 9633 case OMPD_declare_mapper: 9634 case OMPD_taskloop: 9635 case OMPD_taskloop_simd: 9636 case OMPD_master_taskloop: 9637 case OMPD_master_taskloop_simd: 9638 case OMPD_parallel_master_taskloop: 9639 case OMPD_parallel_master_taskloop_simd: 9640 case OMPD_requires: 9641 case OMPD_unknown: 9642 default: 9643 llvm_unreachable("Unexpected directive."); 9644 } 9645 } 9646 9647 return nullptr; 9648} 9649 9650/// Emit the user-defined mapper function. The code generation follows the 9651/// pattern in the example below. 9652/// \code 9653/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9654/// void *base, void *begin, 9655/// int64_t size, int64_t type, 9656/// void *name = nullptr) { 9657/// // Allocate space for an array section first or add a base/begin for 9658/// // pointer dereference. 9659/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9660/// !maptype.IsDelete) 9661/// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9662/// size*sizeof(Ty), clearToFromMember(type)); 9663/// // Map members. 9664/// for (unsigned i = 0; i < size; i++) { 9665/// // For each component specified by this mapper: 9666/// for (auto c : begin[i]->all_components) { 9667/// if (c.hasMapper()) 9668/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9669/// c.arg_type, c.arg_name); 9670/// else 9671/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9672/// c.arg_begin, c.arg_size, c.arg_type, 9673/// c.arg_name); 9674/// } 9675/// } 9676/// // Delete the array section. 9677/// if (size > 1 && maptype.IsDelete) 9678/// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9679/// size*sizeof(Ty), clearToFromMember(type)); 9680/// } 9681/// \endcode 9682void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9683 CodeGenFunction *CGF) { 9684 if (UDMMap.count(D) > 0) 9685 return; 9686 ASTContext &C = CGM.getContext(); 9687 QualType Ty = D->getType(); 9688 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9689 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9690 auto *MapperVarDecl = 9691 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9692 SourceLocation Loc = D->getLocation(); 9693 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9694 9695 // Prepare mapper function arguments and attributes. 9696 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9697 C.VoidPtrTy, ImplicitParamDecl::Other); 9698 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9699 ImplicitParamDecl::Other); 9700 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9701 C.VoidPtrTy, ImplicitParamDecl::Other); 9702 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9703 ImplicitParamDecl::Other); 9704 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9705 ImplicitParamDecl::Other); 9706 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9707 ImplicitParamDecl::Other); 9708 FunctionArgList Args; 9709 Args.push_back(&HandleArg); 9710 Args.push_back(&BaseArg); 9711 Args.push_back(&BeginArg); 9712 Args.push_back(&SizeArg); 9713 Args.push_back(&TypeArg); 9714 Args.push_back(&NameArg); 9715 const CGFunctionInfo &FnInfo = 9716 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9717 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9718 SmallString<64> TyStr; 9719 llvm::raw_svector_ostream Out(TyStr); 9720 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9721 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9722 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9723 Name, &CGM.getModule()); 9724 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9725 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9726 // Start the mapper function code generation. 9727 CodeGenFunction MapperCGF(CGM); 9728 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9729 // Compute the starting and end addresses of array elements. 9730 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9731 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9732 C.getPointerType(Int64Ty), Loc); 9733 // Prepare common arguments for array initiation and deletion. 9734 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9735 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9736 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9737 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9738 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9739 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9740 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9741 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9742 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9743 // Convert the size in bytes into the number of array elements. 9744 Size = MapperCGF.Builder.CreateExactUDiv( 9745 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9746 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9747 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 9748 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9749 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9750 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9751 C.getPointerType(Int64Ty), Loc); 9752 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 9753 MapperCGF.GetAddrOfLocalVar(&NameArg), 9754 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9755 9756 // Emit array initiation if this is an array section and \p MapType indicates 9757 // that memory allocation is required. 9758 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9759 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9760 MapName, ElementSize, HeadBB, /*IsInit=*/true); 9761 9762 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9763 9764 // Emit the loop header block. 9765 MapperCGF.EmitBlock(HeadBB); 9766 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9767 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9768 // Evaluate whether the initial condition is satisfied. 9769 llvm::Value *IsEmpty = 9770 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9771 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9772 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9773 9774 // Emit the loop body block. 9775 MapperCGF.EmitBlock(BodyBB); 9776 llvm::BasicBlock *LastBB = BodyBB; 9777 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9778 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9779 PtrPHI->addIncoming(PtrBegin, EntryBB); 9780 Address PtrCurrent = 9781 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9782 .getAlignment() 9783 .alignmentOfArrayElement(ElementSize)); 9784 // Privatize the declared variable of mapper to be the current array element. 9785 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9786 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 9787 (void)Scope.Privatize(); 9788 9789 // Get map clause information. Fill up the arrays with all mapped variables. 9790 MappableExprsHandler::MapCombinedInfoTy Info; 9791 MappableExprsHandler MEHandler(*D, MapperCGF); 9792 MEHandler.generateAllInfoForMapper(Info); 9793 9794 // Call the runtime API __tgt_mapper_num_components to get the number of 9795 // pre-existing components. 9796 llvm::Value *OffloadingArgs[] = {Handle}; 9797 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9798 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9799 OMPRTL___tgt_mapper_num_components), 9800 OffloadingArgs); 9801 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9802 PreviousSize, 9803 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9804 9805 // Fill up the runtime mapper handle for all components. 9806 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9807 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9808 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9809 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9810 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9811 llvm::Value *CurSizeArg = Info.Sizes[I]; 9812 llvm::Value *CurNameArg = 9813 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9814 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 9815 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 9816 9817 // Extract the MEMBER_OF field from the map type. 9818 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9819 llvm::Value *MemberMapType = 9820 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9821 9822 // Combine the map type inherited from user-defined mapper with that 9823 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9824 // bits of the \a MapType, which is the input argument of the mapper 9825 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9826 // bits of MemberMapType. 9827 // [OpenMP 5.0], 1.2.6. map-type decay. 9828 // | alloc | to | from | tofrom | release | delete 9829 // ---------------------------------------------------------- 9830 // alloc | alloc | alloc | alloc | alloc | release | delete 9831 // to | alloc | to | alloc | to | release | delete 9832 // from | alloc | alloc | from | from | release | delete 9833 // tofrom | alloc | to | from | tofrom | release | delete 9834 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9835 MapType, 9836 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9837 MappableExprsHandler::OMP_MAP_FROM)); 9838 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9839 llvm::BasicBlock *AllocElseBB = 9840 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9841 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9842 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9843 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9844 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9845 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9846 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9847 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9848 MapperCGF.EmitBlock(AllocBB); 9849 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9850 MemberMapType, 9851 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9852 MappableExprsHandler::OMP_MAP_FROM))); 9853 MapperCGF.Builder.CreateBr(EndBB); 9854 MapperCGF.EmitBlock(AllocElseBB); 9855 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9856 LeftToFrom, 9857 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9858 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9859 // In case of to, clear OMP_MAP_FROM. 9860 MapperCGF.EmitBlock(ToBB); 9861 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9862 MemberMapType, 9863 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9864 MapperCGF.Builder.CreateBr(EndBB); 9865 MapperCGF.EmitBlock(ToElseBB); 9866 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9867 LeftToFrom, 9868 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9869 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9870 // In case of from, clear OMP_MAP_TO. 9871 MapperCGF.EmitBlock(FromBB); 9872 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9873 MemberMapType, 9874 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9875 // In case of tofrom, do nothing. 9876 MapperCGF.EmitBlock(EndBB); 9877 LastBB = EndBB; 9878 llvm::PHINode *CurMapType = 9879 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9880 CurMapType->addIncoming(AllocMapType, AllocBB); 9881 CurMapType->addIncoming(ToMapType, ToBB); 9882 CurMapType->addIncoming(FromMapType, FromBB); 9883 CurMapType->addIncoming(MemberMapType, ToElseBB); 9884 9885 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9886 CurSizeArg, CurMapType, CurNameArg}; 9887 if (Info.Mappers[I]) { 9888 // Call the corresponding mapper function. 9889 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9890 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9891 assert(MapperFunc && "Expect a valid mapper function is available."); 9892 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9893 } else { 9894 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9895 // data structure. 9896 MapperCGF.EmitRuntimeCall( 9897 OMPBuilder.getOrCreateRuntimeFunction( 9898 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9899 OffloadingArgs); 9900 } 9901 } 9902 9903 // Update the pointer to point to the next element that needs to be mapped, 9904 // and check whether we have mapped all elements. 9905 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9906 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9907 PtrPHI->addIncoming(PtrNext, LastBB); 9908 llvm::Value *IsDone = 9909 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9910 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9911 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9912 9913 MapperCGF.EmitBlock(ExitBB); 9914 // Emit array deletion if this is an array section and \p MapType indicates 9915 // that deletion is required. 9916 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9917 MapName, ElementSize, DoneBB, /*IsInit=*/false); 9918 9919 // Emit the function exit block. 9920 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9921 MapperCGF.FinishFunction(); 9922 UDMMap.try_emplace(D, Fn); 9923 if (CGF) { 9924 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9925 Decls.second.push_back(D); 9926 } 9927} 9928 9929/// Emit the array initialization or deletion portion for user-defined mapper 9930/// code generation. First, it evaluates whether an array section is mapped and 9931/// whether the \a MapType instructs to delete this section. If \a IsInit is 9932/// true, and \a MapType indicates to not delete this array, array 9933/// initialization code is generated. If \a IsInit is false, and \a MapType 9934/// indicates to not this array, array deletion code is generated. 9935void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9936 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9937 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9938 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 9939 bool IsInit) { 9940 StringRef Prefix = IsInit ? ".init" : ".del"; 9941 9942 // Evaluate if this is an array section. 9943 llvm::BasicBlock *BodyBB = 9944 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9945 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 9946 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9947 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9948 MapType, 9949 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9950 llvm::Value *DeleteCond; 9951 llvm::Value *Cond; 9952 if (IsInit) { 9953 // base != begin? 9954 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 9955 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 9956 // IsPtrAndObj? 9957 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 9958 MapType, 9959 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 9960 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 9961 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 9962 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 9963 DeleteCond = MapperCGF.Builder.CreateIsNull( 9964 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9965 } else { 9966 Cond = IsArray; 9967 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9968 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9969 } 9970 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 9971 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 9972 9973 MapperCGF.EmitBlock(BodyBB); 9974 // Get the array size by multiplying element size and element number (i.e., \p 9975 // Size). 9976 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9977 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9978 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9979 // memory allocation/deletion purpose only. 9980 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9981 MapType, 9982 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9983 MappableExprsHandler::OMP_MAP_FROM))); 9984 MapTypeArg = MapperCGF.Builder.CreateOr( 9985 MapTypeArg, 9986 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 9987 9988 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9989 // data structure. 9990 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 9991 ArraySize, MapTypeArg, MapName}; 9992 MapperCGF.EmitRuntimeCall( 9993 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9994 OMPRTL___tgt_push_mapper_component), 9995 OffloadingArgs); 9996} 9997 9998llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9999 const OMPDeclareMapperDecl *D) { 10000 auto I = UDMMap.find(D); 10001 if (I != UDMMap.end()) 10002 return I->second; 10003 emitUserDefinedMapper(D); 10004 return UDMMap.lookup(D); 10005} 10006 10007void CGOpenMPRuntime::emitTargetNumIterationsCall( 10008 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10009 llvm::Value *DeviceID, 10010 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10011 const OMPLoopDirective &D)> 10012 SizeEmitter) { 10013 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10014 const OMPExecutableDirective *TD = &D; 10015 // Get nested teams distribute kind directive, if any. 10016 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10017 TD = getNestedDistributeDirective(CGM.getContext(), D); 10018 if (!TD) 10019 return; 10020 const auto *LD = cast<OMPLoopDirective>(TD); 10021 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10022 PrePostActionTy &) { 10023 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10024 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10025 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10026 CGF.EmitRuntimeCall( 10027 OMPBuilder.getOrCreateRuntimeFunction( 10028 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10029 Args); 10030 } 10031 }; 10032 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10033} 10034 10035void CGOpenMPRuntime::emitTargetCall( 10036 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10037 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10038 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10039 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10040 const OMPLoopDirective &D)> 10041 SizeEmitter) { 10042 if (!CGF.HaveInsertPoint()) 10043 return; 10044 10045 assert(OutlinedFn && "Invalid outlined function!"); 10046 10047 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10048 D.hasClausesOfKind<OMPNowaitClause>(); 10049 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10050 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10051 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10052 PrePostActionTy &) { 10053 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10054 }; 10055 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10056 10057 CodeGenFunction::OMPTargetDataInfo InputInfo; 10058 llvm::Value *MapTypesArray = nullptr; 10059 llvm::Value *MapNamesArray = nullptr; 10060 // Fill up the pointer arrays and transfer execution to the device. 10061 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10062 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10063 &CapturedVars, 10064 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10065 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10066 // Reverse offloading is not supported, so just execute on the host. 10067 if (RequiresOuterTask) { 10068 CapturedVars.clear(); 10069 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10070 } 10071 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10072 return; 10073 } 10074 10075 // On top of the arrays that were filled up, the target offloading call 10076 // takes as arguments the device id as well as the host pointer. The host 10077 // pointer is used by the runtime library to identify the current target 10078 // region, so it only has to be unique and not necessarily point to 10079 // anything. It could be the pointer to the outlined function that 10080 // implements the target region, but we aren't using that so that the 10081 // compiler doesn't need to keep that, and could therefore inline the host 10082 // function if proven worthwhile during optimization. 10083 10084 // From this point on, we need to have an ID of the target region defined. 10085 assert(OutlinedFnID && "Invalid outlined function ID!"); 10086 10087 // Emit device ID if any. 10088 llvm::Value *DeviceID; 10089 if (Device.getPointer()) { 10090 assert((Device.getInt() == OMPC_DEVICE_unknown || 10091 Device.getInt() == OMPC_DEVICE_device_num) && 10092 "Expected device_num modifier."); 10093 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10094 DeviceID = 10095 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10096 } else { 10097 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10098 } 10099 10100 // Emit the number of elements in the offloading arrays. 10101 llvm::Value *PointerNum = 10102 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10103 10104 // Return value of the runtime offloading call. 10105 llvm::Value *Return; 10106 10107 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10108 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10109 10110 // Source location for the ident struct 10111 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10112 10113 // Emit tripcount for the target loop-based directive. 10114 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10115 10116 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10117 // The target region is an outlined function launched by the runtime 10118 // via calls __tgt_target() or __tgt_target_teams(). 10119 // 10120 // __tgt_target() launches a target region with one team and one thread, 10121 // executing a serial region. This master thread may in turn launch 10122 // more threads within its team upon encountering a parallel region, 10123 // however, no additional teams can be launched on the device. 10124 // 10125 // __tgt_target_teams() launches a target region with one or more teams, 10126 // each with one or more threads. This call is required for target 10127 // constructs such as: 10128 // 'target teams' 10129 // 'target' / 'teams' 10130 // 'target teams distribute parallel for' 10131 // 'target parallel' 10132 // and so on. 10133 // 10134 // Note that on the host and CPU targets, the runtime implementation of 10135 // these calls simply call the outlined function without forking threads. 10136 // The outlined functions themselves have runtime calls to 10137 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10138 // the compiler in emitTeamsCall() and emitParallelCall(). 10139 // 10140 // In contrast, on the NVPTX target, the implementation of 10141 // __tgt_target_teams() launches a GPU kernel with the requested number 10142 // of teams and threads so no additional calls to the runtime are required. 10143 if (NumTeams) { 10144 // If we have NumTeams defined this means that we have an enclosed teams 10145 // region. Therefore we also expect to have NumThreads defined. These two 10146 // values should be defined in the presence of a teams directive, 10147 // regardless of having any clauses associated. If the user is using teams 10148 // but no clauses, these two values will be the default that should be 10149 // passed to the runtime library - a 32-bit integer with the value zero. 10150 assert(NumThreads && "Thread limit expression should be available along " 10151 "with number of teams."); 10152 llvm::Value *OffloadingArgs[] = {RTLoc, 10153 DeviceID, 10154 OutlinedFnID, 10155 PointerNum, 10156 InputInfo.BasePointersArray.getPointer(), 10157 InputInfo.PointersArray.getPointer(), 10158 InputInfo.SizesArray.getPointer(), 10159 MapTypesArray, 10160 MapNamesArray, 10161 InputInfo.MappersArray.getPointer(), 10162 NumTeams, 10163 NumThreads}; 10164 Return = CGF.EmitRuntimeCall( 10165 OMPBuilder.getOrCreateRuntimeFunction( 10166 CGM.getModule(), HasNowait 10167 ? OMPRTL___tgt_target_teams_nowait_mapper 10168 : OMPRTL___tgt_target_teams_mapper), 10169 OffloadingArgs); 10170 } else { 10171 llvm::Value *OffloadingArgs[] = {RTLoc, 10172 DeviceID, 10173 OutlinedFnID, 10174 PointerNum, 10175 InputInfo.BasePointersArray.getPointer(), 10176 InputInfo.PointersArray.getPointer(), 10177 InputInfo.SizesArray.getPointer(), 10178 MapTypesArray, 10179 MapNamesArray, 10180 InputInfo.MappersArray.getPointer()}; 10181 Return = CGF.EmitRuntimeCall( 10182 OMPBuilder.getOrCreateRuntimeFunction( 10183 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10184 : OMPRTL___tgt_target_mapper), 10185 OffloadingArgs); 10186 } 10187 10188 // Check the error code and execute the host version if required. 10189 llvm::BasicBlock *OffloadFailedBlock = 10190 CGF.createBasicBlock("omp_offload.failed"); 10191 llvm::BasicBlock *OffloadContBlock = 10192 CGF.createBasicBlock("omp_offload.cont"); 10193 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10194 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10195 10196 CGF.EmitBlock(OffloadFailedBlock); 10197 if (RequiresOuterTask) { 10198 CapturedVars.clear(); 10199 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10200 } 10201 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10202 CGF.EmitBranch(OffloadContBlock); 10203 10204 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10205 }; 10206 10207 // Notify that the host version must be executed. 10208 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10209 RequiresOuterTask](CodeGenFunction &CGF, 10210 PrePostActionTy &) { 10211 if (RequiresOuterTask) { 10212 CapturedVars.clear(); 10213 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10214 } 10215 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10216 }; 10217 10218 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10219 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10220 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10221 // Fill up the arrays with all the captured variables. 10222 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10223 10224 // Get mappable expression information. 10225 MappableExprsHandler MEHandler(D, CGF); 10226 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10227 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10228 10229 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10230 auto *CV = CapturedVars.begin(); 10231 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10232 CE = CS.capture_end(); 10233 CI != CE; ++CI, ++RI, ++CV) { 10234 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10235 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10236 10237 // VLA sizes are passed to the outlined region by copy and do not have map 10238 // information associated. 10239 if (CI->capturesVariableArrayType()) { 10240 CurInfo.Exprs.push_back(nullptr); 10241 CurInfo.BasePointers.push_back(*CV); 10242 CurInfo.Pointers.push_back(*CV); 10243 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10244 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10245 // Copy to the device as an argument. No need to retrieve it. 10246 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10247 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10248 MappableExprsHandler::OMP_MAP_IMPLICIT); 10249 CurInfo.Mappers.push_back(nullptr); 10250 } else { 10251 // If we have any information in the map clause, we use it, otherwise we 10252 // just do a default mapping. 10253 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10254 if (!CI->capturesThis()) 10255 MappedVarSet.insert(CI->getCapturedVar()); 10256 else 10257 MappedVarSet.insert(nullptr); 10258 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10259 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10260 // Generate correct mapping for variables captured by reference in 10261 // lambdas. 10262 if (CI->capturesVariable()) 10263 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10264 CurInfo, LambdaPointers); 10265 } 10266 // We expect to have at least an element of information for this capture. 10267 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10268 "Non-existing map pointer for capture!"); 10269 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10270 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10271 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10272 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10273 "Inconsistent map information sizes!"); 10274 10275 // If there is an entry in PartialStruct it means we have a struct with 10276 // individual members mapped. Emit an extra combined entry. 10277 if (PartialStruct.Base.isValid()) { 10278 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10279 MEHandler.emitCombinedEntry( 10280 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10281 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10282 } 10283 10284 // We need to append the results of this capture to what we already have. 10285 CombinedInfo.append(CurInfo); 10286 } 10287 // Adjust MEMBER_OF flags for the lambdas captures. 10288 MEHandler.adjustMemberOfForLambdaCaptures( 10289 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10290 CombinedInfo.Types); 10291 // Map any list items in a map clause that were not captures because they 10292 // weren't referenced within the construct. 10293 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10294 10295 TargetDataInfo Info; 10296 // Fill up the arrays and create the arguments. 10297 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10298 emitOffloadingArraysArgument( 10299 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10300 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10301 {/*ForEndTask=*/false}); 10302 10303 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10304 InputInfo.BasePointersArray = 10305 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10306 InputInfo.PointersArray = 10307 Address(Info.PointersArray, CGM.getPointerAlign()); 10308 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10309 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10310 MapTypesArray = Info.MapTypesArray; 10311 MapNamesArray = Info.MapNamesArray; 10312 if (RequiresOuterTask) 10313 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10314 else 10315 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10316 }; 10317 10318 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10319 CodeGenFunction &CGF, PrePostActionTy &) { 10320 if (RequiresOuterTask) { 10321 CodeGenFunction::OMPTargetDataInfo InputInfo; 10322 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10323 } else { 10324 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10325 } 10326 }; 10327 10328 // If we have a target function ID it means that we need to support 10329 // offloading, otherwise, just execute on the host. We need to execute on host 10330 // regardless of the conditional in the if clause if, e.g., the user do not 10331 // specify target triples. 10332 if (OutlinedFnID) { 10333 if (IfCond) { 10334 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10335 } else { 10336 RegionCodeGenTy ThenRCG(TargetThenGen); 10337 ThenRCG(CGF); 10338 } 10339 } else { 10340 RegionCodeGenTy ElseRCG(TargetElseGen); 10341 ElseRCG(CGF); 10342 } 10343} 10344 10345void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10346 StringRef ParentName) { 10347 if (!S) 10348 return; 10349 10350 // Codegen OMP target directives that offload compute to the device. 10351 bool RequiresDeviceCodegen = 10352 isa<OMPExecutableDirective>(S) && 10353 isOpenMPTargetExecutionDirective( 10354 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10355 10356 if (RequiresDeviceCodegen) { 10357 const auto &E = *cast<OMPExecutableDirective>(S); 10358 unsigned DeviceID; 10359 unsigned FileID; 10360 unsigned Line; 10361 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10362 FileID, Line); 10363 10364 // Is this a target region that should not be emitted as an entry point? If 10365 // so just signal we are done with this target region. 10366 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10367 ParentName, Line)) 10368 return; 10369 10370 switch (E.getDirectiveKind()) { 10371 case OMPD_target: 10372 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10373 cast<OMPTargetDirective>(E)); 10374 break; 10375 case OMPD_target_parallel: 10376 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10377 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10378 break; 10379 case OMPD_target_teams: 10380 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10381 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10382 break; 10383 case OMPD_target_teams_distribute: 10384 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10385 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10386 break; 10387 case OMPD_target_teams_distribute_simd: 10388 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10389 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10390 break; 10391 case OMPD_target_parallel_for: 10392 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10393 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10394 break; 10395 case OMPD_target_parallel_for_simd: 10396 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10397 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10398 break; 10399 case OMPD_target_simd: 10400 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10401 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10402 break; 10403 case OMPD_target_teams_distribute_parallel_for: 10404 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10405 CGM, ParentName, 10406 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10407 break; 10408 case OMPD_target_teams_distribute_parallel_for_simd: 10409 CodeGenFunction:: 10410 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10411 CGM, ParentName, 10412 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10413 break; 10414 case OMPD_parallel: 10415 case OMPD_for: 10416 case OMPD_parallel_for: 10417 case OMPD_parallel_master: 10418 case OMPD_parallel_sections: 10419 case OMPD_for_simd: 10420 case OMPD_parallel_for_simd: 10421 case OMPD_cancel: 10422 case OMPD_cancellation_point: 10423 case OMPD_ordered: 10424 case OMPD_threadprivate: 10425 case OMPD_allocate: 10426 case OMPD_task: 10427 case OMPD_simd: 10428 case OMPD_tile: 10429 case OMPD_sections: 10430 case OMPD_section: 10431 case OMPD_single: 10432 case OMPD_master: 10433 case OMPD_critical: 10434 case OMPD_taskyield: 10435 case OMPD_barrier: 10436 case OMPD_taskwait: 10437 case OMPD_taskgroup: 10438 case OMPD_atomic: 10439 case OMPD_flush: 10440 case OMPD_depobj: 10441 case OMPD_scan: 10442 case OMPD_teams: 10443 case OMPD_target_data: 10444 case OMPD_target_exit_data: 10445 case OMPD_target_enter_data: 10446 case OMPD_distribute: 10447 case OMPD_distribute_simd: 10448 case OMPD_distribute_parallel_for: 10449 case OMPD_distribute_parallel_for_simd: 10450 case OMPD_teams_distribute: 10451 case OMPD_teams_distribute_simd: 10452 case OMPD_teams_distribute_parallel_for: 10453 case OMPD_teams_distribute_parallel_for_simd: 10454 case OMPD_target_update: 10455 case OMPD_declare_simd: 10456 case OMPD_declare_variant: 10457 case OMPD_begin_declare_variant: 10458 case OMPD_end_declare_variant: 10459 case OMPD_declare_target: 10460 case OMPD_end_declare_target: 10461 case OMPD_declare_reduction: 10462 case OMPD_declare_mapper: 10463 case OMPD_taskloop: 10464 case OMPD_taskloop_simd: 10465 case OMPD_master_taskloop: 10466 case OMPD_master_taskloop_simd: 10467 case OMPD_parallel_master_taskloop: 10468 case OMPD_parallel_master_taskloop_simd: 10469 case OMPD_requires: 10470 case OMPD_unknown: 10471 default: 10472 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10473 } 10474 return; 10475 } 10476 10477 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10478 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10479 return; 10480 10481 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10482 return; 10483 } 10484 10485 // If this is a lambda function, look into its body. 10486 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10487 S = L->getBody(); 10488 10489 // Keep looking for target regions recursively. 10490 for (const Stmt *II : S->children()) 10491 scanForTargetRegionsFunctions(II, ParentName); 10492} 10493 10494static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10495 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10496 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10497 if (!DevTy) 10498 return false; 10499 // Do not emit device_type(nohost) functions for the host. 10500 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10501 return true; 10502 // Do not emit device_type(host) functions for the device. 10503 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10504 return true; 10505 return false; 10506} 10507 10508bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10509 // If emitting code for the host, we do not process FD here. Instead we do 10510 // the normal code generation. 10511 if (!CGM.getLangOpts().OpenMPIsDevice) { 10512 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10513 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10514 CGM.getLangOpts().OpenMPIsDevice)) 10515 return true; 10516 return false; 10517 } 10518 10519 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10520 // Try to detect target regions in the function. 10521 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10522 StringRef Name = CGM.getMangledName(GD); 10523 scanForTargetRegionsFunctions(FD->getBody(), Name); 10524 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10525 CGM.getLangOpts().OpenMPIsDevice)) 10526 return true; 10527 } 10528 10529 // Do not to emit function if it is not marked as declare target. 10530 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10531 AlreadyEmittedTargetDecls.count(VD) == 0; 10532} 10533 10534bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10535 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10536 CGM.getLangOpts().OpenMPIsDevice)) 10537 return true; 10538 10539 if (!CGM.getLangOpts().OpenMPIsDevice) 10540 return false; 10541 10542 // Check if there are Ctors/Dtors in this declaration and look for target 10543 // regions in it. We use the complete variant to produce the kernel name 10544 // mangling. 10545 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10546 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10547 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10548 StringRef ParentName = 10549 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10550 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10551 } 10552 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10553 StringRef ParentName = 10554 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10555 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10556 } 10557 } 10558 10559 // Do not to emit variable if it is not marked as declare target. 10560 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10561 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10562 cast<VarDecl>(GD.getDecl())); 10563 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10564 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10565 HasRequiresUnifiedSharedMemory)) { 10566 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10567 return true; 10568 } 10569 return false; 10570} 10571 10572llvm::Constant * 10573CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10574 const VarDecl *VD) { 10575 assert(VD->getType().isConstant(CGM.getContext()) && 10576 "Expected constant variable."); 10577 StringRef VarName; 10578 llvm::Constant *Addr; 10579 llvm::GlobalValue::LinkageTypes Linkage; 10580 QualType Ty = VD->getType(); 10581 SmallString<128> Buffer; 10582 { 10583 unsigned DeviceID; 10584 unsigned FileID; 10585 unsigned Line; 10586 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10587 FileID, Line); 10588 llvm::raw_svector_ostream OS(Buffer); 10589 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10590 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10591 VarName = OS.str(); 10592 } 10593 Linkage = llvm::GlobalValue::InternalLinkage; 10594 Addr = 10595 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10596 getDefaultFirstprivateAddressSpace()); 10597 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10598 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10599 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10600 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10601 VarName, Addr, VarSize, 10602 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10603 return Addr; 10604} 10605 10606void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10607 llvm::Constant *Addr) { 10608 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10609 !CGM.getLangOpts().OpenMPIsDevice) 10610 return; 10611 10612 // If we have host/nohost variables, they do not need to be registered. 10613 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10614 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10615 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10616 return; 10617 10618 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10619 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10620 if (!Res) { 10621 if (CGM.getLangOpts().OpenMPIsDevice) { 10622 // Register non-target variables being emitted in device code (debug info 10623 // may cause this). 10624 StringRef VarName = CGM.getMangledName(VD); 10625 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10626 } 10627 return; 10628 } 10629 // Register declare target variables. 10630 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10631 StringRef VarName; 10632 CharUnits VarSize; 10633 llvm::GlobalValue::LinkageTypes Linkage; 10634 10635 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10636 !HasRequiresUnifiedSharedMemory) { 10637 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10638 VarName = CGM.getMangledName(VD); 10639 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10640 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10641 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10642 } else { 10643 VarSize = CharUnits::Zero(); 10644 } 10645 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10646 // Temp solution to prevent optimizations of the internal variables. 10647 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10648 // Do not create a "ref-variable" if the original is not also available 10649 // on the host. 10650 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10651 return; 10652 std::string RefName = getName({VarName, "ref"}); 10653 if (!CGM.GetGlobalValue(RefName)) { 10654 llvm::Constant *AddrRef = 10655 getOrCreateInternalVariable(Addr->getType(), RefName); 10656 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10657 GVAddrRef->setConstant(/*Val=*/true); 10658 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10659 GVAddrRef->setInitializer(Addr); 10660 CGM.addCompilerUsedGlobal(GVAddrRef); 10661 } 10662 } 10663 } else { 10664 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10665 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10666 HasRequiresUnifiedSharedMemory)) && 10667 "Declare target attribute must link or to with unified memory."); 10668 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10669 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10670 else 10671 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10672 10673 if (CGM.getLangOpts().OpenMPIsDevice) { 10674 VarName = Addr->getName(); 10675 Addr = nullptr; 10676 } else { 10677 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10678 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10679 } 10680 VarSize = CGM.getPointerSize(); 10681 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10682 } 10683 10684 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10685 VarName, Addr, VarSize, Flags, Linkage); 10686} 10687 10688bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10689 if (isa<FunctionDecl>(GD.getDecl()) || 10690 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10691 return emitTargetFunctions(GD); 10692 10693 return emitTargetGlobalVariable(GD); 10694} 10695 10696void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10697 for (const VarDecl *VD : DeferredGlobalVariables) { 10698 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10699 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10700 if (!Res) 10701 continue; 10702 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10703 !HasRequiresUnifiedSharedMemory) { 10704 CGM.EmitGlobal(VD); 10705 } else { 10706 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10707 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10708 HasRequiresUnifiedSharedMemory)) && 10709 "Expected link clause or to clause with unified memory."); 10710 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10711 } 10712 } 10713} 10714 10715void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10716 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10717 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10718 " Expected target-based directive."); 10719} 10720 10721void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10722 for (const OMPClause *Clause : D->clauselists()) { 10723 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10724 HasRequiresUnifiedSharedMemory = true; 10725 } else if (const auto *AC = 10726 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10727 switch (AC->getAtomicDefaultMemOrderKind()) { 10728 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10729 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10730 break; 10731 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10732 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10733 break; 10734 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10735 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10736 break; 10737 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10738 break; 10739 } 10740 } 10741 } 10742} 10743 10744llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10745 return RequiresAtomicOrdering; 10746} 10747 10748bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10749 LangAS &AS) { 10750 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10751 return false; 10752 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10753 switch(A->getAllocatorType()) { 10754 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10755 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10756 // Not supported, fallback to the default mem space. 10757 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10758 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10759 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10760 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10761 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10762 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10763 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10764 AS = LangAS::Default; 10765 return true; 10766 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10767 llvm_unreachable("Expected predefined allocator for the variables with the " 10768 "static storage."); 10769 } 10770 return false; 10771} 10772 10773bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10774 return HasRequiresUnifiedSharedMemory; 10775} 10776 10777CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10778 CodeGenModule &CGM) 10779 : CGM(CGM) { 10780 if (CGM.getLangOpts().OpenMPIsDevice) { 10781 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10782 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10783 } 10784} 10785 10786CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10787 if (CGM.getLangOpts().OpenMPIsDevice) 10788 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10789} 10790 10791bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10792 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10793 return true; 10794 10795 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10796 // Do not to emit function if it is marked as declare target as it was already 10797 // emitted. 10798 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10799 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10800 if (auto *F = dyn_cast_or_null<llvm::Function>( 10801 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10802 return !F->isDeclaration(); 10803 return false; 10804 } 10805 return true; 10806 } 10807 10808 return !AlreadyEmittedTargetDecls.insert(D).second; 10809} 10810 10811llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10812 // If we don't have entries or if we are emitting code for the device, we 10813 // don't need to do anything. 10814 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10815 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10816 (OffloadEntriesInfoManager.empty() && 10817 !HasEmittedDeclareTargetRegion && 10818 !HasEmittedTargetRegion)) 10819 return nullptr; 10820 10821 // Create and register the function that handles the requires directives. 10822 ASTContext &C = CGM.getContext(); 10823 10824 llvm::Function *RequiresRegFn; 10825 { 10826 CodeGenFunction CGF(CGM); 10827 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10828 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10829 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10830 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10831 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10832 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10833 // TODO: check for other requires clauses. 10834 // The requires directive takes effect only when a target region is 10835 // present in the compilation unit. Otherwise it is ignored and not 10836 // passed to the runtime. This avoids the runtime from throwing an error 10837 // for mismatching requires clauses across compilation units that don't 10838 // contain at least 1 target region. 10839 assert((HasEmittedTargetRegion || 10840 HasEmittedDeclareTargetRegion || 10841 !OffloadEntriesInfoManager.empty()) && 10842 "Target or declare target region expected."); 10843 if (HasRequiresUnifiedSharedMemory) 10844 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10845 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10846 CGM.getModule(), OMPRTL___tgt_register_requires), 10847 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10848 CGF.FinishFunction(); 10849 } 10850 return RequiresRegFn; 10851} 10852 10853void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10854 const OMPExecutableDirective &D, 10855 SourceLocation Loc, 10856 llvm::Function *OutlinedFn, 10857 ArrayRef<llvm::Value *> CapturedVars) { 10858 if (!CGF.HaveInsertPoint()) 10859 return; 10860 10861 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10862 CodeGenFunction::RunCleanupsScope Scope(CGF); 10863 10864 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10865 llvm::Value *Args[] = { 10866 RTLoc, 10867 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10868 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10869 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10870 RealArgs.append(std::begin(Args), std::end(Args)); 10871 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10872 10873 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10874 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10875 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10876} 10877 10878void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10879 const Expr *NumTeams, 10880 const Expr *ThreadLimit, 10881 SourceLocation Loc) { 10882 if (!CGF.HaveInsertPoint()) 10883 return; 10884 10885 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10886 10887 llvm::Value *NumTeamsVal = 10888 NumTeams 10889 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10890 CGF.CGM.Int32Ty, /* isSigned = */ true) 10891 : CGF.Builder.getInt32(0); 10892 10893 llvm::Value *ThreadLimitVal = 10894 ThreadLimit 10895 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10896 CGF.CGM.Int32Ty, /* isSigned = */ true) 10897 : CGF.Builder.getInt32(0); 10898 10899 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10900 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10901 ThreadLimitVal}; 10902 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10903 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10904 PushNumTeamsArgs); 10905} 10906 10907void CGOpenMPRuntime::emitTargetDataCalls( 10908 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10909 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10910 if (!CGF.HaveInsertPoint()) 10911 return; 10912 10913 // Action used to replace the default codegen action and turn privatization 10914 // off. 10915 PrePostActionTy NoPrivAction; 10916 10917 // Generate the code for the opening of the data environment. Capture all the 10918 // arguments of the runtime call by reference because they are used in the 10919 // closing of the region. 10920 auto &&BeginThenGen = [this, &D, Device, &Info, 10921 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10922 // Fill up the arrays with all the mapped variables. 10923 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10924 10925 // Get map clause information. 10926 MappableExprsHandler MEHandler(D, CGF); 10927 MEHandler.generateAllInfo(CombinedInfo); 10928 10929 // Fill up the arrays and create the arguments. 10930 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10931 /*IsNonContiguous=*/true); 10932 10933 llvm::Value *BasePointersArrayArg = nullptr; 10934 llvm::Value *PointersArrayArg = nullptr; 10935 llvm::Value *SizesArrayArg = nullptr; 10936 llvm::Value *MapTypesArrayArg = nullptr; 10937 llvm::Value *MapNamesArrayArg = nullptr; 10938 llvm::Value *MappersArrayArg = nullptr; 10939 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10940 SizesArrayArg, MapTypesArrayArg, 10941 MapNamesArrayArg, MappersArrayArg, Info); 10942 10943 // Emit device ID if any. 10944 llvm::Value *DeviceID = nullptr; 10945 if (Device) { 10946 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10947 CGF.Int64Ty, /*isSigned=*/true); 10948 } else { 10949 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10950 } 10951 10952 // Emit the number of elements in the offloading arrays. 10953 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10954 // 10955 // Source location for the ident struct 10956 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10957 10958 llvm::Value *OffloadingArgs[] = {RTLoc, 10959 DeviceID, 10960 PointerNum, 10961 BasePointersArrayArg, 10962 PointersArrayArg, 10963 SizesArrayArg, 10964 MapTypesArrayArg, 10965 MapNamesArrayArg, 10966 MappersArrayArg}; 10967 CGF.EmitRuntimeCall( 10968 OMPBuilder.getOrCreateRuntimeFunction( 10969 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10970 OffloadingArgs); 10971 10972 // If device pointer privatization is required, emit the body of the region 10973 // here. It will have to be duplicated: with and without privatization. 10974 if (!Info.CaptureDeviceAddrMap.empty()) 10975 CodeGen(CGF); 10976 }; 10977 10978 // Generate code for the closing of the data region. 10979 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 10980 PrePostActionTy &) { 10981 assert(Info.isValid() && "Invalid data environment closing arguments."); 10982 10983 llvm::Value *BasePointersArrayArg = nullptr; 10984 llvm::Value *PointersArrayArg = nullptr; 10985 llvm::Value *SizesArrayArg = nullptr; 10986 llvm::Value *MapTypesArrayArg = nullptr; 10987 llvm::Value *MapNamesArrayArg = nullptr; 10988 llvm::Value *MappersArrayArg = nullptr; 10989 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10990 SizesArrayArg, MapTypesArrayArg, 10991 MapNamesArrayArg, MappersArrayArg, Info, 10992 {/*ForEndCall=*/true}); 10993 10994 // Emit device ID if any. 10995 llvm::Value *DeviceID = nullptr; 10996 if (Device) { 10997 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10998 CGF.Int64Ty, /*isSigned=*/true); 10999 } else { 11000 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11001 } 11002 11003 // Emit the number of elements in the offloading arrays. 11004 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11005 11006 // Source location for the ident struct 11007 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11008 11009 llvm::Value *OffloadingArgs[] = {RTLoc, 11010 DeviceID, 11011 PointerNum, 11012 BasePointersArrayArg, 11013 PointersArrayArg, 11014 SizesArrayArg, 11015 MapTypesArrayArg, 11016 MapNamesArrayArg, 11017 MappersArrayArg}; 11018 CGF.EmitRuntimeCall( 11019 OMPBuilder.getOrCreateRuntimeFunction( 11020 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11021 OffloadingArgs); 11022 }; 11023 11024 // If we need device pointer privatization, we need to emit the body of the 11025 // region with no privatization in the 'else' branch of the conditional. 11026 // Otherwise, we don't have to do anything. 11027 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11028 PrePostActionTy &) { 11029 if (!Info.CaptureDeviceAddrMap.empty()) { 11030 CodeGen.setAction(NoPrivAction); 11031 CodeGen(CGF); 11032 } 11033 }; 11034 11035 // We don't have to do anything to close the region if the if clause evaluates 11036 // to false. 11037 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11038 11039 if (IfCond) { 11040 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11041 } else { 11042 RegionCodeGenTy RCG(BeginThenGen); 11043 RCG(CGF); 11044 } 11045 11046 // If we don't require privatization of device pointers, we emit the body in 11047 // between the runtime calls. This avoids duplicating the body code. 11048 if (Info.CaptureDeviceAddrMap.empty()) { 11049 CodeGen.setAction(NoPrivAction); 11050 CodeGen(CGF); 11051 } 11052 11053 if (IfCond) { 11054 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11055 } else { 11056 RegionCodeGenTy RCG(EndThenGen); 11057 RCG(CGF); 11058 } 11059} 11060 11061void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11062 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11063 const Expr *Device) { 11064 if (!CGF.HaveInsertPoint()) 11065 return; 11066 11067 assert((isa<OMPTargetEnterDataDirective>(D) || 11068 isa<OMPTargetExitDataDirective>(D) || 11069 isa<OMPTargetUpdateDirective>(D)) && 11070 "Expecting either target enter, exit data, or update directives."); 11071 11072 CodeGenFunction::OMPTargetDataInfo InputInfo; 11073 llvm::Value *MapTypesArray = nullptr; 11074 llvm::Value *MapNamesArray = nullptr; 11075 // Generate the code for the opening of the data environment. 11076 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11077 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11078 // Emit device ID if any. 11079 llvm::Value *DeviceID = nullptr; 11080 if (Device) { 11081 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11082 CGF.Int64Ty, /*isSigned=*/true); 11083 } else { 11084 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11085 } 11086 11087 // Emit the number of elements in the offloading arrays. 11088 llvm::Constant *PointerNum = 11089 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11090 11091 // Source location for the ident struct 11092 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11093 11094 llvm::Value *OffloadingArgs[] = {RTLoc, 11095 DeviceID, 11096 PointerNum, 11097 InputInfo.BasePointersArray.getPointer(), 11098 InputInfo.PointersArray.getPointer(), 11099 InputInfo.SizesArray.getPointer(), 11100 MapTypesArray, 11101 MapNamesArray, 11102 InputInfo.MappersArray.getPointer()}; 11103 11104 // Select the right runtime function call for each standalone 11105 // directive. 11106 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11107 RuntimeFunction RTLFn; 11108 switch (D.getDirectiveKind()) { 11109 case OMPD_target_enter_data: 11110 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11111 : OMPRTL___tgt_target_data_begin_mapper; 11112 break; 11113 case OMPD_target_exit_data: 11114 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11115 : OMPRTL___tgt_target_data_end_mapper; 11116 break; 11117 case OMPD_target_update: 11118 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11119 : OMPRTL___tgt_target_data_update_mapper; 11120 break; 11121 case OMPD_parallel: 11122 case OMPD_for: 11123 case OMPD_parallel_for: 11124 case OMPD_parallel_master: 11125 case OMPD_parallel_sections: 11126 case OMPD_for_simd: 11127 case OMPD_parallel_for_simd: 11128 case OMPD_cancel: 11129 case OMPD_cancellation_point: 11130 case OMPD_ordered: 11131 case OMPD_threadprivate: 11132 case OMPD_allocate: 11133 case OMPD_task: 11134 case OMPD_simd: 11135 case OMPD_tile: 11136 case OMPD_sections: 11137 case OMPD_section: 11138 case OMPD_single: 11139 case OMPD_master: 11140 case OMPD_critical: 11141 case OMPD_taskyield: 11142 case OMPD_barrier: 11143 case OMPD_taskwait: 11144 case OMPD_taskgroup: 11145 case OMPD_atomic: 11146 case OMPD_flush: 11147 case OMPD_depobj: 11148 case OMPD_scan: 11149 case OMPD_teams: 11150 case OMPD_target_data: 11151 case OMPD_distribute: 11152 case OMPD_distribute_simd: 11153 case OMPD_distribute_parallel_for: 11154 case OMPD_distribute_parallel_for_simd: 11155 case OMPD_teams_distribute: 11156 case OMPD_teams_distribute_simd: 11157 case OMPD_teams_distribute_parallel_for: 11158 case OMPD_teams_distribute_parallel_for_simd: 11159 case OMPD_declare_simd: 11160 case OMPD_declare_variant: 11161 case OMPD_begin_declare_variant: 11162 case OMPD_end_declare_variant: 11163 case OMPD_declare_target: 11164 case OMPD_end_declare_target: 11165 case OMPD_declare_reduction: 11166 case OMPD_declare_mapper: 11167 case OMPD_taskloop: 11168 case OMPD_taskloop_simd: 11169 case OMPD_master_taskloop: 11170 case OMPD_master_taskloop_simd: 11171 case OMPD_parallel_master_taskloop: 11172 case OMPD_parallel_master_taskloop_simd: 11173 case OMPD_target: 11174 case OMPD_target_simd: 11175 case OMPD_target_teams_distribute: 11176 case OMPD_target_teams_distribute_simd: 11177 case OMPD_target_teams_distribute_parallel_for: 11178 case OMPD_target_teams_distribute_parallel_for_simd: 11179 case OMPD_target_teams: 11180 case OMPD_target_parallel: 11181 case OMPD_target_parallel_for: 11182 case OMPD_target_parallel_for_simd: 11183 case OMPD_requires: 11184 case OMPD_unknown: 11185 default: 11186 llvm_unreachable("Unexpected standalone target data directive."); 11187 break; 11188 } 11189 CGF.EmitRuntimeCall( 11190 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11191 OffloadingArgs); 11192 }; 11193 11194 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11195 &MapNamesArray](CodeGenFunction &CGF, 11196 PrePostActionTy &) { 11197 // Fill up the arrays with all the mapped variables. 11198 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11199 11200 // Get map clause information. 11201 MappableExprsHandler MEHandler(D, CGF); 11202 MEHandler.generateAllInfo(CombinedInfo); 11203 11204 TargetDataInfo Info; 11205 // Fill up the arrays and create the arguments. 11206 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11207 /*IsNonContiguous=*/true); 11208 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11209 D.hasClausesOfKind<OMPNowaitClause>(); 11210 emitOffloadingArraysArgument( 11211 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11212 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11213 {/*ForEndTask=*/false}); 11214 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11215 InputInfo.BasePointersArray = 11216 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11217 InputInfo.PointersArray = 11218 Address(Info.PointersArray, CGM.getPointerAlign()); 11219 InputInfo.SizesArray = 11220 Address(Info.SizesArray, CGM.getPointerAlign()); 11221 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11222 MapTypesArray = Info.MapTypesArray; 11223 MapNamesArray = Info.MapNamesArray; 11224 if (RequiresOuterTask) 11225 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11226 else 11227 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11228 }; 11229 11230 if (IfCond) { 11231 emitIfClause(CGF, IfCond, TargetThenGen, 11232 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11233 } else { 11234 RegionCodeGenTy ThenRCG(TargetThenGen); 11235 ThenRCG(CGF); 11236 } 11237} 11238 11239namespace { 11240 /// Kind of parameter in a function with 'declare simd' directive. 11241 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11242 /// Attribute set of the parameter. 11243 struct ParamAttrTy { 11244 ParamKindTy Kind = Vector; 11245 llvm::APSInt StrideOrArg; 11246 llvm::APSInt Alignment; 11247 }; 11248} // namespace 11249 11250static unsigned evaluateCDTSize(const FunctionDecl *FD, 11251 ArrayRef<ParamAttrTy> ParamAttrs) { 11252 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11253 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11254 // of that clause. The VLEN value must be power of 2. 11255 // In other case the notion of the function`s "characteristic data type" (CDT) 11256 // is used to compute the vector length. 11257 // CDT is defined in the following order: 11258 // a) For non-void function, the CDT is the return type. 11259 // b) If the function has any non-uniform, non-linear parameters, then the 11260 // CDT is the type of the first such parameter. 11261 // c) If the CDT determined by a) or b) above is struct, union, or class 11262 // type which is pass-by-value (except for the type that maps to the 11263 // built-in complex data type), the characteristic data type is int. 11264 // d) If none of the above three cases is applicable, the CDT is int. 11265 // The VLEN is then determined based on the CDT and the size of vector 11266 // register of that ISA for which current vector version is generated. The 11267 // VLEN is computed using the formula below: 11268 // VLEN = sizeof(vector_register) / sizeof(CDT), 11269 // where vector register size specified in section 3.2.1 Registers and the 11270 // Stack Frame of original AMD64 ABI document. 11271 QualType RetType = FD->getReturnType(); 11272 if (RetType.isNull()) 11273 return 0; 11274 ASTContext &C = FD->getASTContext(); 11275 QualType CDT; 11276 if (!RetType.isNull() && !RetType->isVoidType()) { 11277 CDT = RetType; 11278 } else { 11279 unsigned Offset = 0; 11280 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11281 if (ParamAttrs[Offset].Kind == Vector) 11282 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11283 ++Offset; 11284 } 11285 if (CDT.isNull()) { 11286 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11287 if (ParamAttrs[I + Offset].Kind == Vector) { 11288 CDT = FD->getParamDecl(I)->getType(); 11289 break; 11290 } 11291 } 11292 } 11293 } 11294 if (CDT.isNull()) 11295 CDT = C.IntTy; 11296 CDT = CDT->getCanonicalTypeUnqualified(); 11297 if (CDT->isRecordType() || CDT->isUnionType()) 11298 CDT = C.IntTy; 11299 return C.getTypeSize(CDT); 11300} 11301 11302static void 11303emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11304 const llvm::APSInt &VLENVal, 11305 ArrayRef<ParamAttrTy> ParamAttrs, 11306 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11307 struct ISADataTy { 11308 char ISA; 11309 unsigned VecRegSize; 11310 }; 11311 ISADataTy ISAData[] = { 11312 { 11313 'b', 128 11314 }, // SSE 11315 { 11316 'c', 256 11317 }, // AVX 11318 { 11319 'd', 256 11320 }, // AVX2 11321 { 11322 'e', 512 11323 }, // AVX512 11324 }; 11325 llvm::SmallVector<char, 2> Masked; 11326 switch (State) { 11327 case OMPDeclareSimdDeclAttr::BS_Undefined: 11328 Masked.push_back('N'); 11329 Masked.push_back('M'); 11330 break; 11331 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11332 Masked.push_back('N'); 11333 break; 11334 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11335 Masked.push_back('M'); 11336 break; 11337 } 11338 for (char Mask : Masked) { 11339 for (const ISADataTy &Data : ISAData) { 11340 SmallString<256> Buffer; 11341 llvm::raw_svector_ostream Out(Buffer); 11342 Out << "_ZGV" << Data.ISA << Mask; 11343 if (!VLENVal) { 11344 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11345 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11346 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11347 } else { 11348 Out << VLENVal; 11349 } 11350 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11351 switch (ParamAttr.Kind){ 11352 case LinearWithVarStride: 11353 Out << 's' << ParamAttr.StrideOrArg; 11354 break; 11355 case Linear: 11356 Out << 'l'; 11357 if (ParamAttr.StrideOrArg != 1) 11358 Out << ParamAttr.StrideOrArg; 11359 break; 11360 case Uniform: 11361 Out << 'u'; 11362 break; 11363 case Vector: 11364 Out << 'v'; 11365 break; 11366 } 11367 if (!!ParamAttr.Alignment) 11368 Out << 'a' << ParamAttr.Alignment; 11369 } 11370 Out << '_' << Fn->getName(); 11371 Fn->addFnAttr(Out.str()); 11372 } 11373 } 11374} 11375 11376// This are the Functions that are needed to mangle the name of the 11377// vector functions generated by the compiler, according to the rules 11378// defined in the "Vector Function ABI specifications for AArch64", 11379// available at 11380// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11381 11382/// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11383/// 11384/// TODO: Need to implement the behavior for reference marked with a 11385/// var or no linear modifiers (1.b in the section). For this, we 11386/// need to extend ParamKindTy to support the linear modifiers. 11387static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11388 QT = QT.getCanonicalType(); 11389 11390 if (QT->isVoidType()) 11391 return false; 11392 11393 if (Kind == ParamKindTy::Uniform) 11394 return false; 11395 11396 if (Kind == ParamKindTy::Linear) 11397 return false; 11398 11399 // TODO: Handle linear references with modifiers 11400 11401 if (Kind == ParamKindTy::LinearWithVarStride) 11402 return false; 11403 11404 return true; 11405} 11406 11407/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11408static bool getAArch64PBV(QualType QT, ASTContext &C) { 11409 QT = QT.getCanonicalType(); 11410 unsigned Size = C.getTypeSize(QT); 11411 11412 // Only scalars and complex within 16 bytes wide set PVB to true. 11413 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11414 return false; 11415 11416 if (QT->isFloatingType()) 11417 return true; 11418 11419 if (QT->isIntegerType()) 11420 return true; 11421 11422 if (QT->isPointerType()) 11423 return true; 11424 11425 // TODO: Add support for complex types (section 3.1.2, item 2). 11426 11427 return false; 11428} 11429 11430/// Computes the lane size (LS) of a return type or of an input parameter, 11431/// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11432/// TODO: Add support for references, section 3.2.1, item 1. 11433static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11434 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11435 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11436 if (getAArch64PBV(PTy, C)) 11437 return C.getTypeSize(PTy); 11438 } 11439 if (getAArch64PBV(QT, C)) 11440 return C.getTypeSize(QT); 11441 11442 return C.getTypeSize(C.getUIntPtrType()); 11443} 11444 11445// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11446// signature of the scalar function, as defined in 3.2.2 of the 11447// AAVFABI. 11448static std::tuple<unsigned, unsigned, bool> 11449getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11450 QualType RetType = FD->getReturnType().getCanonicalType(); 11451 11452 ASTContext &C = FD->getASTContext(); 11453 11454 bool OutputBecomesInput = false; 11455 11456 llvm::SmallVector<unsigned, 8> Sizes; 11457 if (!RetType->isVoidType()) { 11458 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11459 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11460 OutputBecomesInput = true; 11461 } 11462 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11463 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11464 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11465 } 11466 11467 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11468 // The LS of a function parameter / return value can only be a power 11469 // of 2, starting from 8 bits, up to 128. 11470 assert(std::all_of(Sizes.begin(), Sizes.end(), 11471 [](unsigned Size) { 11472 return Size == 8 || Size == 16 || Size == 32 || 11473 Size == 64 || Size == 128; 11474 }) && 11475 "Invalid size"); 11476 11477 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11478 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11479 OutputBecomesInput); 11480} 11481 11482/// Mangle the parameter part of the vector function name according to 11483/// their OpenMP classification. The mangling function is defined in 11484/// section 3.5 of the AAVFABI. 11485static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11486 SmallString<256> Buffer; 11487 llvm::raw_svector_ostream Out(Buffer); 11488 for (const auto &ParamAttr : ParamAttrs) { 11489 switch (ParamAttr.Kind) { 11490 case LinearWithVarStride: 11491 Out << "ls" << ParamAttr.StrideOrArg; 11492 break; 11493 case Linear: 11494 Out << 'l'; 11495 // Don't print the step value if it is not present or if it is 11496 // equal to 1. 11497 if (ParamAttr.StrideOrArg != 1) 11498 Out << ParamAttr.StrideOrArg; 11499 break; 11500 case Uniform: 11501 Out << 'u'; 11502 break; 11503 case Vector: 11504 Out << 'v'; 11505 break; 11506 } 11507 11508 if (!!ParamAttr.Alignment) 11509 Out << 'a' << ParamAttr.Alignment; 11510 } 11511 11512 return std::string(Out.str()); 11513} 11514 11515// Function used to add the attribute. The parameter `VLEN` is 11516// templated to allow the use of "x" when targeting scalable functions 11517// for SVE. 11518template <typename T> 11519static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11520 char ISA, StringRef ParSeq, 11521 StringRef MangledName, bool OutputBecomesInput, 11522 llvm::Function *Fn) { 11523 SmallString<256> Buffer; 11524 llvm::raw_svector_ostream Out(Buffer); 11525 Out << Prefix << ISA << LMask << VLEN; 11526 if (OutputBecomesInput) 11527 Out << "v"; 11528 Out << ParSeq << "_" << MangledName; 11529 Fn->addFnAttr(Out.str()); 11530} 11531 11532// Helper function to generate the Advanced SIMD names depending on 11533// the value of the NDS when simdlen is not present. 11534static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11535 StringRef Prefix, char ISA, 11536 StringRef ParSeq, StringRef MangledName, 11537 bool OutputBecomesInput, 11538 llvm::Function *Fn) { 11539 switch (NDS) { 11540 case 8: 11541 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11542 OutputBecomesInput, Fn); 11543 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11544 OutputBecomesInput, Fn); 11545 break; 11546 case 16: 11547 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11548 OutputBecomesInput, Fn); 11549 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11550 OutputBecomesInput, Fn); 11551 break; 11552 case 32: 11553 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11554 OutputBecomesInput, Fn); 11555 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11556 OutputBecomesInput, Fn); 11557 break; 11558 case 64: 11559 case 128: 11560 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11561 OutputBecomesInput, Fn); 11562 break; 11563 default: 11564 llvm_unreachable("Scalar type is too wide."); 11565 } 11566} 11567 11568/// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11569static void emitAArch64DeclareSimdFunction( 11570 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11571 ArrayRef<ParamAttrTy> ParamAttrs, 11572 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11573 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11574 11575 // Get basic data for building the vector signature. 11576 const auto Data = getNDSWDS(FD, ParamAttrs); 11577 const unsigned NDS = std::get<0>(Data); 11578 const unsigned WDS = std::get<1>(Data); 11579 const bool OutputBecomesInput = std::get<2>(Data); 11580 11581 // Check the values provided via `simdlen` by the user. 11582 // 1. A `simdlen(1)` doesn't produce vector signatures, 11583 if (UserVLEN == 1) { 11584 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11585 DiagnosticsEngine::Warning, 11586 "The clause simdlen(1) has no effect when targeting aarch64."); 11587 CGM.getDiags().Report(SLoc, DiagID); 11588 return; 11589 } 11590 11591 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11592 // Advanced SIMD output. 11593 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11594 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11595 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11596 "power of 2 when targeting Advanced SIMD."); 11597 CGM.getDiags().Report(SLoc, DiagID); 11598 return; 11599 } 11600 11601 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11602 // limits. 11603 if (ISA == 's' && UserVLEN != 0) { 11604 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11605 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11606 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11607 "lanes in the architectural constraints " 11608 "for SVE (min is 128-bit, max is " 11609 "2048-bit, by steps of 128-bit)"); 11610 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11611 return; 11612 } 11613 } 11614 11615 // Sort out parameter sequence. 11616 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11617 StringRef Prefix = "_ZGV"; 11618 // Generate simdlen from user input (if any). 11619 if (UserVLEN) { 11620 if (ISA == 's') { 11621 // SVE generates only a masked function. 11622 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11623 OutputBecomesInput, Fn); 11624 } else { 11625 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11626 // Advanced SIMD generates one or two functions, depending on 11627 // the `[not]inbranch` clause. 11628 switch (State) { 11629 case OMPDeclareSimdDeclAttr::BS_Undefined: 11630 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11631 OutputBecomesInput, Fn); 11632 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11633 OutputBecomesInput, Fn); 11634 break; 11635 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11636 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11637 OutputBecomesInput, Fn); 11638 break; 11639 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11640 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11641 OutputBecomesInput, Fn); 11642 break; 11643 } 11644 } 11645 } else { 11646 // If no user simdlen is provided, follow the AAVFABI rules for 11647 // generating the vector length. 11648 if (ISA == 's') { 11649 // SVE, section 3.4.1, item 1. 11650 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11651 OutputBecomesInput, Fn); 11652 } else { 11653 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11654 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11655 // two vector names depending on the use of the clause 11656 // `[not]inbranch`. 11657 switch (State) { 11658 case OMPDeclareSimdDeclAttr::BS_Undefined: 11659 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11660 OutputBecomesInput, Fn); 11661 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11662 OutputBecomesInput, Fn); 11663 break; 11664 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11665 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11666 OutputBecomesInput, Fn); 11667 break; 11668 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11669 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11670 OutputBecomesInput, Fn); 11671 break; 11672 } 11673 } 11674 } 11675} 11676 11677void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11678 llvm::Function *Fn) { 11679 ASTContext &C = CGM.getContext(); 11680 FD = FD->getMostRecentDecl(); 11681 // Map params to their positions in function decl. 11682 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11683 if (isa<CXXMethodDecl>(FD)) 11684 ParamPositions.try_emplace(FD, 0); 11685 unsigned ParamPos = ParamPositions.size(); 11686 for (const ParmVarDecl *P : FD->parameters()) { 11687 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11688 ++ParamPos; 11689 } 11690 while (FD) { 11691 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11692 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11693 // Mark uniform parameters. 11694 for (const Expr *E : Attr->uniforms()) { 11695 E = E->IgnoreParenImpCasts(); 11696 unsigned Pos; 11697 if (isa<CXXThisExpr>(E)) { 11698 Pos = ParamPositions[FD]; 11699 } else { 11700 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11701 ->getCanonicalDecl(); 11702 Pos = ParamPositions[PVD]; 11703 } 11704 ParamAttrs[Pos].Kind = Uniform; 11705 } 11706 // Get alignment info. 11707 auto NI = Attr->alignments_begin(); 11708 for (const Expr *E : Attr->aligneds()) { 11709 E = E->IgnoreParenImpCasts(); 11710 unsigned Pos; 11711 QualType ParmTy; 11712 if (isa<CXXThisExpr>(E)) { 11713 Pos = ParamPositions[FD]; 11714 ParmTy = E->getType(); 11715 } else { 11716 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11717 ->getCanonicalDecl(); 11718 Pos = ParamPositions[PVD]; 11719 ParmTy = PVD->getType(); 11720 } 11721 ParamAttrs[Pos].Alignment = 11722 (*NI) 11723 ? (*NI)->EvaluateKnownConstInt(C) 11724 : llvm::APSInt::getUnsigned( 11725 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11726 .getQuantity()); 11727 ++NI; 11728 } 11729 // Mark linear parameters. 11730 auto SI = Attr->steps_begin(); 11731 auto MI = Attr->modifiers_begin(); 11732 for (const Expr *E : Attr->linears()) { 11733 E = E->IgnoreParenImpCasts(); 11734 unsigned Pos; 11735 // Rescaling factor needed to compute the linear parameter 11736 // value in the mangled name. 11737 unsigned PtrRescalingFactor = 1; 11738 if (isa<CXXThisExpr>(E)) { 11739 Pos = ParamPositions[FD]; 11740 } else { 11741 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11742 ->getCanonicalDecl(); 11743 Pos = ParamPositions[PVD]; 11744 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11745 PtrRescalingFactor = CGM.getContext() 11746 .getTypeSizeInChars(P->getPointeeType()) 11747 .getQuantity(); 11748 } 11749 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11750 ParamAttr.Kind = Linear; 11751 // Assuming a stride of 1, for `linear` without modifiers. 11752 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11753 if (*SI) { 11754 Expr::EvalResult Result; 11755 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11756 if (const auto *DRE = 11757 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11758 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11759 ParamAttr.Kind = LinearWithVarStride; 11760 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11761 ParamPositions[StridePVD->getCanonicalDecl()]); 11762 } 11763 } 11764 } else { 11765 ParamAttr.StrideOrArg = Result.Val.getInt(); 11766 } 11767 } 11768 // If we are using a linear clause on a pointer, we need to 11769 // rescale the value of linear_step with the byte size of the 11770 // pointee type. 11771 if (Linear == ParamAttr.Kind) 11772 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11773 ++SI; 11774 ++MI; 11775 } 11776 llvm::APSInt VLENVal; 11777 SourceLocation ExprLoc; 11778 const Expr *VLENExpr = Attr->getSimdlen(); 11779 if (VLENExpr) { 11780 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11781 ExprLoc = VLENExpr->getExprLoc(); 11782 } 11783 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11784 if (CGM.getTriple().isX86()) { 11785 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11786 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11787 unsigned VLEN = VLENVal.getExtValue(); 11788 StringRef MangledName = Fn->getName(); 11789 if (CGM.getTarget().hasFeature("sve")) 11790 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11791 MangledName, 's', 128, Fn, ExprLoc); 11792 if (CGM.getTarget().hasFeature("neon")) 11793 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11794 MangledName, 'n', 128, Fn, ExprLoc); 11795 } 11796 } 11797 FD = FD->getPreviousDecl(); 11798 } 11799} 11800 11801namespace { 11802/// Cleanup action for doacross support. 11803class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11804public: 11805 static const int DoacrossFinArgs = 2; 11806 11807private: 11808 llvm::FunctionCallee RTLFn; 11809 llvm::Value *Args[DoacrossFinArgs]; 11810 11811public: 11812 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11813 ArrayRef<llvm::Value *> CallArgs) 11814 : RTLFn(RTLFn) { 11815 assert(CallArgs.size() == DoacrossFinArgs); 11816 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11817 } 11818 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11819 if (!CGF.HaveInsertPoint()) 11820 return; 11821 CGF.EmitRuntimeCall(RTLFn, Args); 11822 } 11823}; 11824} // namespace 11825 11826void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11827 const OMPLoopDirective &D, 11828 ArrayRef<Expr *> NumIterations) { 11829 if (!CGF.HaveInsertPoint()) 11830 return; 11831 11832 ASTContext &C = CGM.getContext(); 11833 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11834 RecordDecl *RD; 11835 if (KmpDimTy.isNull()) { 11836 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11837 // kmp_int64 lo; // lower 11838 // kmp_int64 up; // upper 11839 // kmp_int64 st; // stride 11840 // }; 11841 RD = C.buildImplicitRecord("kmp_dim"); 11842 RD->startDefinition(); 11843 addFieldToRecordDecl(C, RD, Int64Ty); 11844 addFieldToRecordDecl(C, RD, Int64Ty); 11845 addFieldToRecordDecl(C, RD, Int64Ty); 11846 RD->completeDefinition(); 11847 KmpDimTy = C.getRecordType(RD); 11848 } else { 11849 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11850 } 11851 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11852 QualType ArrayTy = 11853 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11854 11855 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11856 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11857 enum { LowerFD = 0, UpperFD, StrideFD }; 11858 // Fill dims with data. 11859 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11860 LValue DimsLVal = CGF.MakeAddrLValue( 11861 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11862 // dims.upper = num_iterations; 11863 LValue UpperLVal = CGF.EmitLValueForField( 11864 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11865 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11866 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11867 Int64Ty, NumIterations[I]->getExprLoc()); 11868 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11869 // dims.stride = 1; 11870 LValue StrideLVal = CGF.EmitLValueForField( 11871 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11872 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11873 StrideLVal); 11874 } 11875 11876 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11877 // kmp_int32 num_dims, struct kmp_dim * dims); 11878 llvm::Value *Args[] = { 11879 emitUpdateLocation(CGF, D.getBeginLoc()), 11880 getThreadID(CGF, D.getBeginLoc()), 11881 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11882 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11883 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11884 CGM.VoidPtrTy)}; 11885 11886 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11887 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11888 CGF.EmitRuntimeCall(RTLFn, Args); 11889 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11890 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11891 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11892 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11893 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11894 llvm::makeArrayRef(FiniArgs)); 11895} 11896 11897void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11898 const OMPDependClause *C) { 11899 QualType Int64Ty = 11900 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11901 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11902 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11903 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11904 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11905 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11906 const Expr *CounterVal = C->getLoopData(I); 11907 assert(CounterVal); 11908 llvm::Value *CntVal = CGF.EmitScalarConversion( 11909 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11910 CounterVal->getExprLoc()); 11911 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11912 /*Volatile=*/false, Int64Ty); 11913 } 11914 llvm::Value *Args[] = { 11915 emitUpdateLocation(CGF, C->getBeginLoc()), 11916 getThreadID(CGF, C->getBeginLoc()), 11917 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11918 llvm::FunctionCallee RTLFn; 11919 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11920 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11921 OMPRTL___kmpc_doacross_post); 11922 } else { 11923 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11924 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11925 OMPRTL___kmpc_doacross_wait); 11926 } 11927 CGF.EmitRuntimeCall(RTLFn, Args); 11928} 11929 11930void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11931 llvm::FunctionCallee Callee, 11932 ArrayRef<llvm::Value *> Args) const { 11933 assert(Loc.isValid() && "Outlined function call location must be valid."); 11934 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11935 11936 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11937 if (Fn->doesNotThrow()) { 11938 CGF.EmitNounwindRuntimeCall(Fn, Args); 11939 return; 11940 } 11941 } 11942 CGF.EmitRuntimeCall(Callee, Args); 11943} 11944 11945void CGOpenMPRuntime::emitOutlinedFunctionCall( 11946 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11947 ArrayRef<llvm::Value *> Args) const { 11948 emitCall(CGF, Loc, OutlinedFn, Args); 11949} 11950 11951void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11952 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11953 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11954 HasEmittedDeclareTargetRegion = true; 11955} 11956 11957Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11958 const VarDecl *NativeParam, 11959 const VarDecl *TargetParam) const { 11960 return CGF.GetAddrOfLocalVar(NativeParam); 11961} 11962 11963Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11964 const VarDecl *VD) { 11965 if (!VD) 11966 return Address::invalid(); 11967 Address UntiedAddr = Address::invalid(); 11968 Address UntiedRealAddr = Address::invalid(); 11969 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11970 if (It != FunctionToUntiedTaskStackMap.end()) { 11971 const UntiedLocalVarsAddressesMap &UntiedData = 11972 UntiedLocalVarsStack[It->second]; 11973 auto I = UntiedData.find(VD); 11974 if (I != UntiedData.end()) { 11975 UntiedAddr = I->second.first; 11976 UntiedRealAddr = I->second.second; 11977 } 11978 } 11979 const VarDecl *CVD = VD->getCanonicalDecl(); 11980 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11981 // Use the default allocation. 11982 if (!isAllocatableDecl(VD)) 11983 return UntiedAddr; 11984 llvm::Value *Size; 11985 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11986 if (CVD->getType()->isVariablyModifiedType()) { 11987 Size = CGF.getTypeSize(CVD->getType()); 11988 // Align the size: ((size + align - 1) / align) * align 11989 Size = CGF.Builder.CreateNUWAdd( 11990 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11991 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11992 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11993 } else { 11994 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11995 Size = CGM.getSize(Sz.alignTo(Align)); 11996 } 11997 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11998 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11999 assert(AA->getAllocator() && 12000 "Expected allocator expression for non-default allocator."); 12001 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 12002 // According to the standard, the original allocator type is a enum 12003 // (integer). Convert to pointer type, if required. 12004 Allocator = CGF.EmitScalarConversion( 12005 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 12006 AA->getAllocator()->getExprLoc()); 12007 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 12008 12009 llvm::Value *Addr = 12010 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 12011 CGM.getModule(), OMPRTL___kmpc_alloc), 12012 Args, getName({CVD->getName(), ".void.addr"})); 12013 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12014 CGM.getModule(), OMPRTL___kmpc_free); 12015 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12016 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12017 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12018 if (UntiedAddr.isValid()) 12019 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12020 12021 // Cleanup action for allocate support. 12022 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12023 llvm::FunctionCallee RTLFn; 12024 unsigned LocEncoding; 12025 Address Addr; 12026 const Expr *Allocator; 12027 12028 public: 12029 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, 12030 Address Addr, const Expr *Allocator) 12031 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12032 Allocator(Allocator) {} 12033 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12034 if (!CGF.HaveInsertPoint()) 12035 return; 12036 llvm::Value *Args[3]; 12037 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12038 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12039 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12040 Addr.getPointer(), CGF.VoidPtrTy); 12041 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 12042 // According to the standard, the original allocator type is a enum 12043 // (integer). Convert to pointer type, if required. 12044 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12045 CGF.getContext().VoidPtrTy, 12046 Allocator->getExprLoc()); 12047 Args[2] = AllocVal; 12048 12049 CGF.EmitRuntimeCall(RTLFn, Args); 12050 } 12051 }; 12052 Address VDAddr = 12053 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12054 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12055 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12056 VDAddr, AA->getAllocator()); 12057 if (UntiedRealAddr.isValid()) 12058 if (auto *Region = 12059 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12060 Region->emitUntiedSwitch(CGF); 12061 return VDAddr; 12062 } 12063 return UntiedAddr; 12064} 12065 12066bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12067 const VarDecl *VD) const { 12068 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12069 if (It == FunctionToUntiedTaskStackMap.end()) 12070 return false; 12071 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12072} 12073 12074CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12075 CodeGenModule &CGM, const OMPLoopDirective &S) 12076 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12077 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12078 if (!NeedToPush) 12079 return; 12080 NontemporalDeclsSet &DS = 12081 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12082 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12083 for (const Stmt *Ref : C->private_refs()) { 12084 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12085 const ValueDecl *VD; 12086 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12087 VD = DRE->getDecl(); 12088 } else { 12089 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12090 assert((ME->isImplicitCXXThis() || 12091 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12092 "Expected member of current class."); 12093 VD = ME->getMemberDecl(); 12094 } 12095 DS.insert(VD); 12096 } 12097 } 12098} 12099 12100CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12101 if (!NeedToPush) 12102 return; 12103 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12104} 12105 12106CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12107 CodeGenFunction &CGF, 12108 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12109 std::pair<Address, Address>> &LocalVars) 12110 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12111 if (!NeedToPush) 12112 return; 12113 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12114 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12115 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12116} 12117 12118CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12119 if (!NeedToPush) 12120 return; 12121 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12122} 12123 12124bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12125 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12126 12127 return llvm::any_of( 12128 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12129 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 12130} 12131 12132void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12133 const OMPExecutableDirective &S, 12134 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12135 const { 12136 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12137 // Vars in target/task regions must be excluded completely. 12138 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12139 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12140 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12141 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12142 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12143 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12144 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12145 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12146 } 12147 } 12148 // Exclude vars in private clauses. 12149 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12150 for (const Expr *Ref : C->varlists()) { 12151 if (!Ref->getType()->isScalarType()) 12152 continue; 12153 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12154 if (!DRE) 12155 continue; 12156 NeedToCheckForLPCs.insert(DRE->getDecl()); 12157 } 12158 } 12159 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12160 for (const Expr *Ref : C->varlists()) { 12161 if (!Ref->getType()->isScalarType()) 12162 continue; 12163 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12164 if (!DRE) 12165 continue; 12166 NeedToCheckForLPCs.insert(DRE->getDecl()); 12167 } 12168 } 12169 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12170 for (const Expr *Ref : C->varlists()) { 12171 if (!Ref->getType()->isScalarType()) 12172 continue; 12173 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12174 if (!DRE) 12175 continue; 12176 NeedToCheckForLPCs.insert(DRE->getDecl()); 12177 } 12178 } 12179 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12180 for (const Expr *Ref : C->varlists()) { 12181 if (!Ref->getType()->isScalarType()) 12182 continue; 12183 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12184 if (!DRE) 12185 continue; 12186 NeedToCheckForLPCs.insert(DRE->getDecl()); 12187 } 12188 } 12189 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12190 for (const Expr *Ref : C->varlists()) { 12191 if (!Ref->getType()->isScalarType()) 12192 continue; 12193 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12194 if (!DRE) 12195 continue; 12196 NeedToCheckForLPCs.insert(DRE->getDecl()); 12197 } 12198 } 12199 for (const Decl *VD : NeedToCheckForLPCs) { 12200 for (const LastprivateConditionalData &Data : 12201 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12202 if (Data.DeclToUniqueName.count(VD) > 0) { 12203 if (!Data.Disabled) 12204 NeedToAddForLPCsAsDisabled.insert(VD); 12205 break; 12206 } 12207 } 12208 } 12209} 12210 12211CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12212 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12213 : CGM(CGF.CGM), 12214 Action((CGM.getLangOpts().OpenMP >= 50 && 12215 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12216 [](const OMPLastprivateClause *C) { 12217 return C->getKind() == 12218 OMPC_LASTPRIVATE_conditional; 12219 })) 12220 ? ActionToDo::PushAsLastprivateConditional 12221 : ActionToDo::DoNotPush) { 12222 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12223 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12224 return; 12225 assert(Action == ActionToDo::PushAsLastprivateConditional && 12226 "Expected a push action."); 12227 LastprivateConditionalData &Data = 12228 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12229 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12230 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12231 continue; 12232 12233 for (const Expr *Ref : C->varlists()) { 12234 Data.DeclToUniqueName.insert(std::make_pair( 12235 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12236 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12237 } 12238 } 12239 Data.IVLVal = IVLVal; 12240 Data.Fn = CGF.CurFn; 12241} 12242 12243CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12244 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12245 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12246 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12247 if (CGM.getLangOpts().OpenMP < 50) 12248 return; 12249 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12250 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12251 if (!NeedToAddForLPCsAsDisabled.empty()) { 12252 Action = ActionToDo::DisableLastprivateConditional; 12253 LastprivateConditionalData &Data = 12254 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12255 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12256 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12257 Data.Fn = CGF.CurFn; 12258 Data.Disabled = true; 12259 } 12260} 12261 12262CGOpenMPRuntime::LastprivateConditionalRAII 12263CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12264 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12265 return LastprivateConditionalRAII(CGF, S); 12266} 12267 12268CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12269 if (CGM.getLangOpts().OpenMP < 50) 12270 return; 12271 if (Action == ActionToDo::DisableLastprivateConditional) { 12272 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12273 "Expected list of disabled private vars."); 12274 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12275 } 12276 if (Action == ActionToDo::PushAsLastprivateConditional) { 12277 assert( 12278 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12279 "Expected list of lastprivate conditional vars."); 12280 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12281 } 12282} 12283 12284Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12285 const VarDecl *VD) { 12286 ASTContext &C = CGM.getContext(); 12287 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12288 if (I == LastprivateConditionalToTypes.end()) 12289 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12290 QualType NewType; 12291 const FieldDecl *VDField; 12292 const FieldDecl *FiredField; 12293 LValue BaseLVal; 12294 auto VI = I->getSecond().find(VD); 12295 if (VI == I->getSecond().end()) { 12296 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12297 RD->startDefinition(); 12298 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12299 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12300 RD->completeDefinition(); 12301 NewType = C.getRecordType(RD); 12302 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12303 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12304 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12305 } else { 12306 NewType = std::get<0>(VI->getSecond()); 12307 VDField = std::get<1>(VI->getSecond()); 12308 FiredField = std::get<2>(VI->getSecond()); 12309 BaseLVal = std::get<3>(VI->getSecond()); 12310 } 12311 LValue FiredLVal = 12312 CGF.EmitLValueForField(BaseLVal, FiredField); 12313 CGF.EmitStoreOfScalar( 12314 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12315 FiredLVal); 12316 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12317} 12318 12319namespace { 12320/// Checks if the lastprivate conditional variable is referenced in LHS. 12321class LastprivateConditionalRefChecker final 12322 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12323 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12324 const Expr *FoundE = nullptr; 12325 const Decl *FoundD = nullptr; 12326 StringRef UniqueDeclName; 12327 LValue IVLVal; 12328 llvm::Function *FoundFn = nullptr; 12329 SourceLocation Loc; 12330 12331public: 12332 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12333 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12334 llvm::reverse(LPM)) { 12335 auto It = D.DeclToUniqueName.find(E->getDecl()); 12336 if (It == D.DeclToUniqueName.end()) 12337 continue; 12338 if (D.Disabled) 12339 return false; 12340 FoundE = E; 12341 FoundD = E->getDecl()->getCanonicalDecl(); 12342 UniqueDeclName = It->second; 12343 IVLVal = D.IVLVal; 12344 FoundFn = D.Fn; 12345 break; 12346 } 12347 return FoundE == E; 12348 } 12349 bool VisitMemberExpr(const MemberExpr *E) { 12350 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12351 return false; 12352 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12353 llvm::reverse(LPM)) { 12354 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12355 if (It == D.DeclToUniqueName.end()) 12356 continue; 12357 if (D.Disabled) 12358 return false; 12359 FoundE = E; 12360 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12361 UniqueDeclName = It->second; 12362 IVLVal = D.IVLVal; 12363 FoundFn = D.Fn; 12364 break; 12365 } 12366 return FoundE == E; 12367 } 12368 bool VisitStmt(const Stmt *S) { 12369 for (const Stmt *Child : S->children()) { 12370 if (!Child) 12371 continue; 12372 if (const auto *E = dyn_cast<Expr>(Child)) 12373 if (!E->isGLValue()) 12374 continue; 12375 if (Visit(Child)) 12376 return true; 12377 } 12378 return false; 12379 } 12380 explicit LastprivateConditionalRefChecker( 12381 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12382 : LPM(LPM) {} 12383 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12384 getFoundData() const { 12385 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12386 } 12387}; 12388} // namespace 12389 12390void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12391 LValue IVLVal, 12392 StringRef UniqueDeclName, 12393 LValue LVal, 12394 SourceLocation Loc) { 12395 // Last updated loop counter for the lastprivate conditional var. 12396 // int<xx> last_iv = 0; 12397 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12398 llvm::Constant *LastIV = 12399 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12400 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12401 IVLVal.getAlignment().getAsAlign()); 12402 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12403 12404 // Last value of the lastprivate conditional. 12405 // decltype(priv_a) last_a; 12406 llvm::Constant *Last = getOrCreateInternalVariable( 12407 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12408 cast<llvm::GlobalVariable>(Last)->setAlignment( 12409 LVal.getAlignment().getAsAlign()); 12410 LValue LastLVal = 12411 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12412 12413 // Global loop counter. Required to handle inner parallel-for regions. 12414 // iv 12415 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12416 12417 // #pragma omp critical(a) 12418 // if (last_iv <= iv) { 12419 // last_iv = iv; 12420 // last_a = priv_a; 12421 // } 12422 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12423 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12424 Action.Enter(CGF); 12425 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12426 // (last_iv <= iv) ? Check if the variable is updated and store new 12427 // value in global var. 12428 llvm::Value *CmpRes; 12429 if (IVLVal.getType()->isSignedIntegerType()) { 12430 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12431 } else { 12432 assert(IVLVal.getType()->isUnsignedIntegerType() && 12433 "Loop iteration variable must be integer."); 12434 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12435 } 12436 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12437 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12438 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12439 // { 12440 CGF.EmitBlock(ThenBB); 12441 12442 // last_iv = iv; 12443 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12444 12445 // last_a = priv_a; 12446 switch (CGF.getEvaluationKind(LVal.getType())) { 12447 case TEK_Scalar: { 12448 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12449 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12450 break; 12451 } 12452 case TEK_Complex: { 12453 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12454 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12455 break; 12456 } 12457 case TEK_Aggregate: 12458 llvm_unreachable( 12459 "Aggregates are not supported in lastprivate conditional."); 12460 } 12461 // } 12462 CGF.EmitBranch(ExitBB); 12463 // There is no need to emit line number for unconditional branch. 12464 (void)ApplyDebugLocation::CreateEmpty(CGF); 12465 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12466 }; 12467 12468 if (CGM.getLangOpts().OpenMPSimd) { 12469 // Do not emit as a critical region as no parallel region could be emitted. 12470 RegionCodeGenTy ThenRCG(CodeGen); 12471 ThenRCG(CGF); 12472 } else { 12473 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12474 } 12475} 12476 12477void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12478 const Expr *LHS) { 12479 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12480 return; 12481 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12482 if (!Checker.Visit(LHS)) 12483 return; 12484 const Expr *FoundE; 12485 const Decl *FoundD; 12486 StringRef UniqueDeclName; 12487 LValue IVLVal; 12488 llvm::Function *FoundFn; 12489 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12490 Checker.getFoundData(); 12491 if (FoundFn != CGF.CurFn) { 12492 // Special codegen for inner parallel regions. 12493 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12494 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12495 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12496 "Lastprivate conditional is not found in outer region."); 12497 QualType StructTy = std::get<0>(It->getSecond()); 12498 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12499 LValue PrivLVal = CGF.EmitLValue(FoundE); 12500 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12501 PrivLVal.getAddress(CGF), 12502 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12503 LValue BaseLVal = 12504 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12505 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12506 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12507 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12508 FiredLVal, llvm::AtomicOrdering::Unordered, 12509 /*IsVolatile=*/true, /*isInit=*/false); 12510 return; 12511 } 12512 12513 // Private address of the lastprivate conditional in the current context. 12514 // priv_a 12515 LValue LVal = CGF.EmitLValue(FoundE); 12516 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12517 FoundE->getExprLoc()); 12518} 12519 12520void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12521 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12522 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12523 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12524 return; 12525 auto Range = llvm::reverse(LastprivateConditionalStack); 12526 auto It = llvm::find_if( 12527 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12528 if (It == Range.end() || It->Fn != CGF.CurFn) 12529 return; 12530 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12531 assert(LPCI != LastprivateConditionalToTypes.end() && 12532 "Lastprivates must be registered already."); 12533 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12534 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12535 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12536 for (const auto &Pair : It->DeclToUniqueName) { 12537 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12538 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12539 continue; 12540 auto I = LPCI->getSecond().find(Pair.first); 12541 assert(I != LPCI->getSecond().end() && 12542 "Lastprivate must be rehistered already."); 12543 // bool Cmp = priv_a.Fired != 0; 12544 LValue BaseLVal = std::get<3>(I->getSecond()); 12545 LValue FiredLVal = 12546 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12547 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12548 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12549 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12550 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12551 // if (Cmp) { 12552 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12553 CGF.EmitBlock(ThenBB); 12554 Address Addr = CGF.GetAddrOfLocalVar(VD); 12555 LValue LVal; 12556 if (VD->getType()->isReferenceType()) 12557 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12558 AlignmentSource::Decl); 12559 else 12560 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12561 AlignmentSource::Decl); 12562 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12563 D.getBeginLoc()); 12564 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12565 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12566 // } 12567 } 12568} 12569 12570void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12571 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12572 SourceLocation Loc) { 12573 if (CGF.getLangOpts().OpenMP < 50) 12574 return; 12575 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12576 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12577 "Unknown lastprivate conditional variable."); 12578 StringRef UniqueName = It->second; 12579 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12580 // The variable was not updated in the region - exit. 12581 if (!GV) 12582 return; 12583 LValue LPLVal = CGF.MakeAddrLValue( 12584 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12585 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12586 CGF.EmitStoreOfScalar(Res, PrivLVal); 12587} 12588 12589llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12590 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12591 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12592 llvm_unreachable("Not supported in SIMD-only mode"); 12593} 12594 12595llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12596 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12597 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12598 llvm_unreachable("Not supported in SIMD-only mode"); 12599} 12600 12601llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12602 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12603 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12604 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12605 bool Tied, unsigned &NumberOfParts) { 12606 llvm_unreachable("Not supported in SIMD-only mode"); 12607} 12608 12609void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12610 SourceLocation Loc, 12611 llvm::Function *OutlinedFn, 12612 ArrayRef<llvm::Value *> CapturedVars, 12613 const Expr *IfCond) { 12614 llvm_unreachable("Not supported in SIMD-only mode"); 12615} 12616 12617void CGOpenMPSIMDRuntime::emitCriticalRegion( 12618 CodeGenFunction &CGF, StringRef CriticalName, 12619 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12620 const Expr *Hint) { 12621 llvm_unreachable("Not supported in SIMD-only mode"); 12622} 12623 12624void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12625 const RegionCodeGenTy &MasterOpGen, 12626 SourceLocation Loc) { 12627 llvm_unreachable("Not supported in SIMD-only mode"); 12628} 12629 12630void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12631 const RegionCodeGenTy &MasterOpGen, 12632 SourceLocation Loc, 12633 const Expr *Filter) { 12634 llvm_unreachable("Not supported in SIMD-only mode"); 12635} 12636 12637void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12638 SourceLocation Loc) { 12639 llvm_unreachable("Not supported in SIMD-only mode"); 12640} 12641 12642void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12643 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12644 SourceLocation Loc) { 12645 llvm_unreachable("Not supported in SIMD-only mode"); 12646} 12647 12648void CGOpenMPSIMDRuntime::emitSingleRegion( 12649 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12650 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12651 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12652 ArrayRef<const Expr *> AssignmentOps) { 12653 llvm_unreachable("Not supported in SIMD-only mode"); 12654} 12655 12656void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12657 const RegionCodeGenTy &OrderedOpGen, 12658 SourceLocation Loc, 12659 bool IsThreads) { 12660 llvm_unreachable("Not supported in SIMD-only mode"); 12661} 12662 12663void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12664 SourceLocation Loc, 12665 OpenMPDirectiveKind Kind, 12666 bool EmitChecks, 12667 bool ForceSimpleCall) { 12668 llvm_unreachable("Not supported in SIMD-only mode"); 12669} 12670 12671void CGOpenMPSIMDRuntime::emitForDispatchInit( 12672 CodeGenFunction &CGF, SourceLocation Loc, 12673 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12674 bool Ordered, const DispatchRTInput &DispatchValues) { 12675 llvm_unreachable("Not supported in SIMD-only mode"); 12676} 12677 12678void CGOpenMPSIMDRuntime::emitForStaticInit( 12679 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12680 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12681 llvm_unreachable("Not supported in SIMD-only mode"); 12682} 12683 12684void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12685 CodeGenFunction &CGF, SourceLocation Loc, 12686 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12687 llvm_unreachable("Not supported in SIMD-only mode"); 12688} 12689 12690void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12691 SourceLocation Loc, 12692 unsigned IVSize, 12693 bool IVSigned) { 12694 llvm_unreachable("Not supported in SIMD-only mode"); 12695} 12696 12697void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12698 SourceLocation Loc, 12699 OpenMPDirectiveKind DKind) { 12700 llvm_unreachable("Not supported in SIMD-only mode"); 12701} 12702 12703llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12704 SourceLocation Loc, 12705 unsigned IVSize, bool IVSigned, 12706 Address IL, Address LB, 12707 Address UB, Address ST) { 12708 llvm_unreachable("Not supported in SIMD-only mode"); 12709} 12710 12711void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12712 llvm::Value *NumThreads, 12713 SourceLocation Loc) { 12714 llvm_unreachable("Not supported in SIMD-only mode"); 12715} 12716 12717void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12718 ProcBindKind ProcBind, 12719 SourceLocation Loc) { 12720 llvm_unreachable("Not supported in SIMD-only mode"); 12721} 12722 12723Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12724 const VarDecl *VD, 12725 Address VDAddr, 12726 SourceLocation Loc) { 12727 llvm_unreachable("Not supported in SIMD-only mode"); 12728} 12729 12730llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12731 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12732 CodeGenFunction *CGF) { 12733 llvm_unreachable("Not supported in SIMD-only mode"); 12734} 12735 12736Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12737 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12738 llvm_unreachable("Not supported in SIMD-only mode"); 12739} 12740 12741void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12742 ArrayRef<const Expr *> Vars, 12743 SourceLocation Loc, 12744 llvm::AtomicOrdering AO) { 12745 llvm_unreachable("Not supported in SIMD-only mode"); 12746} 12747 12748void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12749 const OMPExecutableDirective &D, 12750 llvm::Function *TaskFunction, 12751 QualType SharedsTy, Address Shareds, 12752 const Expr *IfCond, 12753 const OMPTaskDataTy &Data) { 12754 llvm_unreachable("Not supported in SIMD-only mode"); 12755} 12756 12757void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12758 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12759 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12760 const Expr *IfCond, const OMPTaskDataTy &Data) { 12761 llvm_unreachable("Not supported in SIMD-only mode"); 12762} 12763 12764void CGOpenMPSIMDRuntime::emitReduction( 12765 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12766 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12767 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12768 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12769 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12770 ReductionOps, Options); 12771} 12772 12773llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12774 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12775 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12776 llvm_unreachable("Not supported in SIMD-only mode"); 12777} 12778 12779void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12780 SourceLocation Loc, 12781 bool IsWorksharingReduction) { 12782 llvm_unreachable("Not supported in SIMD-only mode"); 12783} 12784 12785void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12786 SourceLocation Loc, 12787 ReductionCodeGen &RCG, 12788 unsigned N) { 12789 llvm_unreachable("Not supported in SIMD-only mode"); 12790} 12791 12792Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12793 SourceLocation Loc, 12794 llvm::Value *ReductionsPtr, 12795 LValue SharedLVal) { 12796 llvm_unreachable("Not supported in SIMD-only mode"); 12797} 12798 12799void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12800 SourceLocation Loc) { 12801 llvm_unreachable("Not supported in SIMD-only mode"); 12802} 12803 12804void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12805 CodeGenFunction &CGF, SourceLocation Loc, 12806 OpenMPDirectiveKind CancelRegion) { 12807 llvm_unreachable("Not supported in SIMD-only mode"); 12808} 12809 12810void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12811 SourceLocation Loc, const Expr *IfCond, 12812 OpenMPDirectiveKind CancelRegion) { 12813 llvm_unreachable("Not supported in SIMD-only mode"); 12814} 12815 12816void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12817 const OMPExecutableDirective &D, StringRef ParentName, 12818 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12819 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12820 llvm_unreachable("Not supported in SIMD-only mode"); 12821} 12822 12823void CGOpenMPSIMDRuntime::emitTargetCall( 12824 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12825 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12826 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12827 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12828 const OMPLoopDirective &D)> 12829 SizeEmitter) { 12830 llvm_unreachable("Not supported in SIMD-only mode"); 12831} 12832 12833bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12834 llvm_unreachable("Not supported in SIMD-only mode"); 12835} 12836 12837bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12838 llvm_unreachable("Not supported in SIMD-only mode"); 12839} 12840 12841bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12842 return false; 12843} 12844 12845void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12846 const OMPExecutableDirective &D, 12847 SourceLocation Loc, 12848 llvm::Function *OutlinedFn, 12849 ArrayRef<llvm::Value *> CapturedVars) { 12850 llvm_unreachable("Not supported in SIMD-only mode"); 12851} 12852 12853void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12854 const Expr *NumTeams, 12855 const Expr *ThreadLimit, 12856 SourceLocation Loc) { 12857 llvm_unreachable("Not supported in SIMD-only mode"); 12858} 12859 12860void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12861 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12862 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12863 llvm_unreachable("Not supported in SIMD-only mode"); 12864} 12865 12866void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12867 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12868 const Expr *Device) { 12869 llvm_unreachable("Not supported in SIMD-only mode"); 12870} 12871 12872void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12873 const OMPLoopDirective &D, 12874 ArrayRef<Expr *> NumIterations) { 12875 llvm_unreachable("Not supported in SIMD-only mode"); 12876} 12877 12878void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12879 const OMPDependClause *C) { 12880 llvm_unreachable("Not supported in SIMD-only mode"); 12881} 12882 12883const VarDecl * 12884CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12885 const VarDecl *NativeParam) const { 12886 llvm_unreachable("Not supported in SIMD-only mode"); 12887} 12888 12889Address 12890CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12891 const VarDecl *NativeParam, 12892 const VarDecl *TargetParam) const { 12893 llvm_unreachable("Not supported in SIMD-only mode"); 12894} 12895