1//===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file implements the OpenMPIRBuilder class, which is used as a
11/// convenient way to create LLVM instructions for OpenMP directives.
12///
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
16
17#include "llvm/ADT/StringRef.h"
18#include "llvm/ADT/StringSwitch.h"
19#include "llvm/IR/CFG.h"
20#include "llvm/IR/DebugInfo.h"
21#include "llvm/IR/MDBuilder.h"
22#include "llvm/IR/IRBuilder.h"
23#include "llvm/Support/CommandLine.h"
24#include "llvm/Support/Error.h"
25#include "llvm/Transforms/Utils/BasicBlockUtils.h"
26#include "llvm/Transforms/Utils/CodeExtractor.h"
27
28#include <sstream>
29
30#define DEBUG_TYPE "openmp-ir-builder"
31
32using namespace llvm;
33using namespace omp;
34using namespace types;
35
36static cl::opt<bool>
37    OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
38                         cl::desc("Use optimistic attributes describing "
39                                  "'as-if' properties of runtime calls."),
40                         cl::init(false));
41
42void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
43  LLVMContext &Ctx = Fn.getContext();
44
45#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
46#include "llvm/Frontend/OpenMP/OMPKinds.def"
47
48  // Add attributes to the new declaration.
49  switch (FnID) {
50#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets)                \
51  case Enum:                                                                   \
52    Fn.setAttributes(                                                          \
53        AttributeList::get(Ctx, FnAttrSet, RetAttrSet, ArgAttrSets));          \
54    break;
55#include "llvm/Frontend/OpenMP/OMPKinds.def"
56  default:
57    // Attributes are optional.
58    break;
59  }
60}
61
62Function *OpenMPIRBuilder::getOrCreateRuntimeFunction(RuntimeFunction FnID) {
63  Function *Fn = nullptr;
64
65  // Try to find the declation in the module first.
66  switch (FnID) {
67#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...)                          \
68  case Enum:                                                                   \
69    Fn = M.getFunction(Str);                                                   \
70    break;
71#include "llvm/Frontend/OpenMP/OMPKinds.def"
72  }
73
74  if (!Fn) {
75    // Create a new declaration if we need one.
76    switch (FnID) {
77#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...)                          \
78  case Enum:                                                                   \
79    Fn = Function::Create(FunctionType::get(ReturnType,                        \
80                                            ArrayRef<Type *>{__VA_ARGS__},     \
81                                            IsVarArg),                         \
82                          GlobalValue::ExternalLinkage, Str, M);               \
83    break;
84#include "llvm/Frontend/OpenMP/OMPKinds.def"
85    }
86
87    addAttributes(FnID, *Fn);
88  }
89
90  assert(Fn && "Failed to create OpenMP runtime function");
91  return Fn;
92}
93
94void OpenMPIRBuilder::initialize() { initializeTypes(M); }
95
96Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
97                                         IdentFlag LocFlags) {
98  // Enable "C-mode".
99  LocFlags |= OMP_IDENT_FLAG_KMPC;
100
101  GlobalVariable *&DefaultIdent = IdentMap[{SrcLocStr, uint64_t(LocFlags)}];
102  if (!DefaultIdent) {
103    Constant *I32Null = ConstantInt::getNullValue(Int32);
104    Constant *IdentData[] = {I32Null,
105                             ConstantInt::get(Int32, uint64_t(LocFlags)),
106                             I32Null, I32Null, SrcLocStr};
107    Constant *Initializer = ConstantStruct::get(
108        cast<StructType>(IdentPtr->getPointerElementType()), IdentData);
109
110    // Look for existing encoding of the location + flags, not needed but
111    // minimizes the difference to the existing solution while we transition.
112    for (GlobalVariable &GV : M.getGlobalList())
113      if (GV.getType() == IdentPtr && GV.hasInitializer())
114        if (GV.getInitializer() == Initializer)
115          return DefaultIdent = &GV;
116
117    DefaultIdent = new GlobalVariable(M, IdentPtr->getPointerElementType(),
118                                      /* isConstant = */ false,
119                                      GlobalValue::PrivateLinkage, Initializer);
120    DefaultIdent->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
121    DefaultIdent->setAlignment(Align(8));
122  }
123  return DefaultIdent;
124}
125
126Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
127  Constant *&SrcLocStr = SrcLocStrMap[LocStr];
128  if (!SrcLocStr) {
129    Constant *Initializer =
130        ConstantDataArray::getString(M.getContext(), LocStr);
131
132    // Look for existing encoding of the location, not needed but minimizes the
133    // difference to the existing solution while we transition.
134    for (GlobalVariable &GV : M.getGlobalList())
135      if (GV.isConstant() && GV.hasInitializer() &&
136          GV.getInitializer() == Initializer)
137        return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
138
139    SrcLocStr = Builder.CreateGlobalStringPtr(LocStr);
140  }
141  return SrcLocStr;
142}
143
144Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() {
145  return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
146}
147
148Constant *
149OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
150  DILocation *DIL = Loc.DL.get();
151  if (!DIL)
152    return getOrCreateDefaultSrcLocStr();
153  StringRef Filename =
154      !DIL->getFilename().empty() ? DIL->getFilename() : M.getName();
155  StringRef Function = DIL->getScope()->getSubprogram()->getName();
156  Function =
157      !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName();
158  std::string LineStr = std::to_string(DIL->getLine());
159  std::string ColumnStr = std::to_string(DIL->getColumn());
160  std::stringstream SrcLocStr;
161  SrcLocStr << ";" << Filename.data() << ";" << Function.data() << ";"
162            << LineStr << ";" << ColumnStr << ";;";
163  return getOrCreateSrcLocStr(SrcLocStr.str());
164}
165
166Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {
167  return Builder.CreateCall(
168      getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num), Ident,
169      "omp_global_thread_num");
170}
171
172OpenMPIRBuilder::InsertPointTy
173OpenMPIRBuilder::CreateBarrier(const LocationDescription &Loc, Directive DK,
174                               bool ForceSimpleCall, bool CheckCancelFlag) {
175  if (!updateToLocation(Loc))
176    return Loc.IP;
177  return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
178}
179
180OpenMPIRBuilder::InsertPointTy
181OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
182                                 bool ForceSimpleCall, bool CheckCancelFlag) {
183  // Build call __kmpc_cancel_barrier(loc, thread_id) or
184  //            __kmpc_barrier(loc, thread_id);
185
186  IdentFlag BarrierLocFlags;
187  switch (Kind) {
188  case OMPD_for:
189    BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
190    break;
191  case OMPD_sections:
192    BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
193    break;
194  case OMPD_single:
195    BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
196    break;
197  case OMPD_barrier:
198    BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
199    break;
200  default:
201    BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
202    break;
203  }
204
205  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
206  Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags),
207                   getOrCreateThreadID(getOrCreateIdent(SrcLocStr))};
208
209  // If we are in a cancellable parallel region, barriers are cancellation
210  // points.
211  // TODO: Check why we would force simple calls or to ignore the cancel flag.
212  bool UseCancelBarrier =
213      !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
214
215  Value *Result = Builder.CreateCall(
216      getOrCreateRuntimeFunction(UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
217                                                  : OMPRTL___kmpc_barrier),
218      Args);
219
220  if (UseCancelBarrier && CheckCancelFlag)
221    emitCancelationCheckImpl(Result, OMPD_parallel);
222
223  return Builder.saveIP();
224}
225
226OpenMPIRBuilder::InsertPointTy
227OpenMPIRBuilder::CreateCancel(const LocationDescription &Loc,
228                              Value *IfCondition,
229                              omp::Directive CanceledDirective) {
230  if (!updateToLocation(Loc))
231    return Loc.IP;
232
233  // LLVM utilities like blocks with terminators.
234  auto *UI = Builder.CreateUnreachable();
235
236  Instruction *ThenTI = UI, *ElseTI = nullptr;
237  if (IfCondition)
238    SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
239  Builder.SetInsertPoint(ThenTI);
240
241  Value *CancelKind = nullptr;
242  switch (CanceledDirective) {
243#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value)                       \
244  case DirectiveEnum:                                                          \
245    CancelKind = Builder.getInt32(Value);                                      \
246    break;
247#include "llvm/Frontend/OpenMP/OMPKinds.def"
248  default:
249    llvm_unreachable("Unknown cancel kind!");
250  }
251
252  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
253  Value *Ident = getOrCreateIdent(SrcLocStr);
254  Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
255  Value *Result = Builder.CreateCall(
256      getOrCreateRuntimeFunction(OMPRTL___kmpc_cancel), Args);
257
258  // The actual cancel logic is shared with others, e.g., cancel_barriers.
259  emitCancelationCheckImpl(Result, CanceledDirective);
260
261  // Update the insertion point and remove the terminator we introduced.
262  Builder.SetInsertPoint(UI->getParent());
263  UI->eraseFromParent();
264
265  return Builder.saveIP();
266}
267
268void OpenMPIRBuilder::emitCancelationCheckImpl(
269    Value *CancelFlag, omp::Directive CanceledDirective) {
270  assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
271         "Unexpected cancellation!");
272
273  // For a cancel barrier we create two new blocks.
274  BasicBlock *BB = Builder.GetInsertBlock();
275  BasicBlock *NonCancellationBlock;
276  if (Builder.GetInsertPoint() == BB->end()) {
277    // TODO: This branch will not be needed once we moved to the
278    // OpenMPIRBuilder codegen completely.
279    NonCancellationBlock = BasicBlock::Create(
280        BB->getContext(), BB->getName() + ".cont", BB->getParent());
281  } else {
282    NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
283    BB->getTerminator()->eraseFromParent();
284    Builder.SetInsertPoint(BB);
285  }
286  BasicBlock *CancellationBlock = BasicBlock::Create(
287      BB->getContext(), BB->getName() + ".cncl", BB->getParent());
288
289  // Jump to them based on the return value.
290  Value *Cmp = Builder.CreateIsNull(CancelFlag);
291  Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
292                       /* TODO weight */ nullptr, nullptr);
293
294  // From the cancellation block we finalize all variables and go to the
295  // post finalization block that is known to the FiniCB callback.
296  Builder.SetInsertPoint(CancellationBlock);
297  auto &FI = FinalizationStack.back();
298  FI.FiniCB(Builder.saveIP());
299
300  // The continuation block is where code generation continues.
301  Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
302}
303
304IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
305    const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
306    PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
307    Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) {
308  if (!updateToLocation(Loc))
309    return Loc.IP;
310
311  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
312  Value *Ident = getOrCreateIdent(SrcLocStr);
313  Value *ThreadID = getOrCreateThreadID(Ident);
314
315  if (NumThreads) {
316    // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
317    Value *Args[] = {
318        Ident, ThreadID,
319        Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
320    Builder.CreateCall(
321        getOrCreateRuntimeFunction(OMPRTL___kmpc_push_num_threads), Args);
322  }
323
324  if (ProcBind != OMP_PROC_BIND_default) {
325    // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
326    Value *Args[] = {
327        Ident, ThreadID,
328        ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
329    Builder.CreateCall(getOrCreateRuntimeFunction(OMPRTL___kmpc_push_proc_bind),
330                       Args);
331  }
332
333  BasicBlock *InsertBB = Builder.GetInsertBlock();
334  Function *OuterFn = InsertBB->getParent();
335
336  // Vector to remember instructions we used only during the modeling but which
337  // we want to delete at the end.
338  SmallVector<Instruction *, 4> ToBeDeleted;
339
340  Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI());
341  AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
342  AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
343
344  // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
345  // program, otherwise we only need them for modeling purposes to get the
346  // associated arguments in the outlined function. In the former case,
347  // initialize the allocas properly, in the latter case, delete them later.
348  if (IfCondition) {
349    Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
350    Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
351  } else {
352    ToBeDeleted.push_back(TIDAddr);
353    ToBeDeleted.push_back(ZeroAddr);
354  }
355
356  // Create an artificial insertion point that will also ensure the blocks we
357  // are about to split are not degenerated.
358  auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
359
360  Instruction *ThenTI = UI, *ElseTI = nullptr;
361  if (IfCondition)
362    SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
363
364  BasicBlock *ThenBB = ThenTI->getParent();
365  BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
366  BasicBlock *PRegBodyBB =
367      PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
368  BasicBlock *PRegPreFiniBB =
369      PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
370  BasicBlock *PRegExitBB =
371      PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
372
373  auto FiniCBWrapper = [&](InsertPointTy IP) {
374    // Hide "open-ended" blocks from the given FiniCB by setting the right jump
375    // target to the region exit block.
376    if (IP.getBlock()->end() == IP.getPoint()) {
377      IRBuilder<>::InsertPointGuard IPG(Builder);
378      Builder.restoreIP(IP);
379      Instruction *I = Builder.CreateBr(PRegExitBB);
380      IP = InsertPointTy(I->getParent(), I->getIterator());
381    }
382    assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
383           IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
384           "Unexpected insertion point for finalization call!");
385    return FiniCB(IP);
386  };
387
388  FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
389
390  // Generate the privatization allocas in the block that will become the entry
391  // of the outlined function.
392  InsertPointTy AllocaIP(PRegEntryBB,
393                         PRegEntryBB->getTerminator()->getIterator());
394  Builder.restoreIP(AllocaIP);
395  AllocaInst *PrivTIDAddr =
396      Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
397  Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid");
398
399  // Add some fake uses for OpenMP provided arguments.
400  ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use"));
401  ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use"));
402
403  // ThenBB
404  //   |
405  //   V
406  // PRegionEntryBB         <- Privatization allocas are placed here.
407  //   |
408  //   V
409  // PRegionBodyBB          <- BodeGen is invoked here.
410  //   |
411  //   V
412  // PRegPreFiniBB          <- The block we will start finalization from.
413  //   |
414  //   V
415  // PRegionExitBB          <- A common exit to simplify block collection.
416  //
417
418  LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n");
419
420  // Let the caller create the body.
421  assert(BodyGenCB && "Expected body generation callback!");
422  InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
423  BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
424
425  LLVM_DEBUG(dbgs() << "After  body codegen: " << *UI->getFunction() << "\n");
426
427  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
428  SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist;
429  ParallelRegionBlockSet.insert(PRegEntryBB);
430  ParallelRegionBlockSet.insert(PRegExitBB);
431
432  // Collect all blocks in-between PRegEntryBB and PRegExitBB.
433  Worklist.push_back(PRegEntryBB);
434  while (!Worklist.empty()) {
435    BasicBlock *BB = Worklist.pop_back_val();
436    ParallelRegionBlocks.push_back(BB);
437    for (BasicBlock *SuccBB : successors(BB))
438      if (ParallelRegionBlockSet.insert(SuccBB).second)
439        Worklist.push_back(SuccBB);
440  }
441
442  CodeExtractorAnalysisCache CEAC(*OuterFn);
443  CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr,
444                          /* AggregateArgs */ false,
445                          /* BlockFrequencyInfo */ nullptr,
446                          /* BranchProbabilityInfo */ nullptr,
447                          /* AssumptionCache */ nullptr,
448                          /* AllowVarArgs */ true,
449                          /* AllowAlloca */ true,
450                          /* Suffix */ ".omp_par");
451
452  // Find inputs to, outputs from the code region.
453  BasicBlock *CommonExit = nullptr;
454  SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
455  Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
456  Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
457
458  LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n");
459
460  FunctionCallee TIDRTLFn =
461      getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num);
462
463  auto PrivHelper = [&](Value &V) {
464    if (&V == TIDAddr || &V == ZeroAddr)
465      return;
466
467    SmallVector<Use *, 8> Uses;
468    for (Use &U : V.uses())
469      if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
470        if (ParallelRegionBlockSet.count(UserI->getParent()))
471          Uses.push_back(&U);
472
473    Value *ReplacementValue = nullptr;
474    CallInst *CI = dyn_cast<CallInst>(&V);
475    if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
476      ReplacementValue = PrivTID;
477    } else {
478      Builder.restoreIP(
479          PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue));
480      assert(ReplacementValue &&
481             "Expected copy/create callback to set replacement value!");
482      if (ReplacementValue == &V)
483        return;
484    }
485
486    for (Use *UPtr : Uses)
487      UPtr->set(ReplacementValue);
488  };
489
490  for (Value *Input : Inputs) {
491    LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
492    PrivHelper(*Input);
493  }
494  for (Value *Output : Outputs) {
495    LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
496    PrivHelper(*Output);
497  }
498
499  LLVM_DEBUG(dbgs() << "After  privatization: " << *UI->getFunction() << "\n");
500  LLVM_DEBUG({
501    for (auto *BB : ParallelRegionBlocks)
502      dbgs() << " PBR: " << BB->getName() << "\n";
503  });
504
505  // Add some known attributes to the outlined function.
506  Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
507  OutlinedFn->addParamAttr(0, Attribute::NoAlias);
508  OutlinedFn->addParamAttr(1, Attribute::NoAlias);
509  OutlinedFn->addFnAttr(Attribute::NoUnwind);
510  OutlinedFn->addFnAttr(Attribute::NoRecurse);
511
512  LLVM_DEBUG(dbgs() << "After      outlining: " << *UI->getFunction() << "\n");
513  LLVM_DEBUG(dbgs() << "   Outlined function: " << *OutlinedFn << "\n");
514
515  // For compability with the clang CG we move the outlined function after the
516  // one with the parallel region.
517  OutlinedFn->removeFromParent();
518  M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
519
520  // Remove the artificial entry introduced by the extractor right away, we
521  // made our own entry block after all.
522  {
523    BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
524    assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB);
525    assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry);
526    PRegEntryBB->moveBefore(&ArtificialEntry);
527    ArtificialEntry.eraseFromParent();
528  }
529  LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n");
530  assert(&OutlinedFn->getEntryBlock() == PRegEntryBB);
531
532  assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
533  assert(OutlinedFn->arg_size() >= 2 &&
534         "Expected at least tid and bounded tid as arguments");
535  unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2;
536
537  CallInst *CI = cast<CallInst>(OutlinedFn->user_back());
538  CI->getParent()->setName("omp_parallel");
539  Builder.SetInsertPoint(CI);
540
541  // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
542  Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
543                           Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)};
544
545  SmallVector<Value *, 16> RealArgs;
546  RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
547  RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
548
549  FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call);
550  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
551    if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
552      llvm::LLVMContext &Ctx = F->getContext();
553      MDBuilder MDB(Ctx);
554      // Annotate the callback behavior of the __kmpc_fork_call:
555      //  - The callback callee is argument number 2 (microtask).
556      //  - The first two arguments of the callback callee are unknown (-1).
557      //  - All variadic arguments to the __kmpc_fork_call are passed to the
558      //    callback callee.
559      F->addMetadata(
560          llvm::LLVMContext::MD_callback,
561          *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
562                                      2, {-1, -1},
563                                      /* VarArgsArePassed */ true)}));
564    }
565  }
566
567  Builder.CreateCall(RTLFn, RealArgs);
568
569  LLVM_DEBUG(dbgs() << "With fork_call placed: "
570                    << *Builder.GetInsertBlock()->getParent() << "\n");
571
572  InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
573  InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
574  UI->eraseFromParent();
575
576  // Initialize the local TID stack location with the argument value.
577  Builder.SetInsertPoint(PrivTID);
578  Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin();
579  Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
580
581  // If no "if" clause was present we do not need the call created during
582  // outlining, otherwise we reuse it in the serialized parallel region.
583  if (!ElseTI) {
584    CI->eraseFromParent();
585  } else {
586
587    // If an "if" clause was present we are now generating the serialized
588    // version into the "else" branch.
589    Builder.SetInsertPoint(ElseTI);
590
591    // Build calls __kmpc_serialized_parallel(&Ident, GTid);
592    Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
593    Builder.CreateCall(
594        getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
595        SerializedParallelCallArgs);
596
597    // OutlinedFn(&GTid, &zero, CapturedStruct);
598    CI->removeFromParent();
599    Builder.Insert(CI);
600
601    // __kmpc_end_serialized_parallel(&Ident, GTid);
602    Value *EndArgs[] = {Ident, ThreadID};
603    Builder.CreateCall(
604        getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
605        EndArgs);
606
607    LLVM_DEBUG(dbgs() << "With serialized parallel region: "
608                      << *Builder.GetInsertBlock()->getParent() << "\n");
609  }
610
611  // Adjust the finalization stack, verify the adjustment, and call the
612  // finalize function a last time to finalize values between the pre-fini block
613  // and the exit block if we left the parallel "the normal way".
614  auto FiniInfo = FinalizationStack.pop_back_val();
615  (void)FiniInfo;
616  assert(FiniInfo.DK == OMPD_parallel &&
617         "Unexpected finalization stack state!");
618
619  Instruction *PreFiniTI = PRegPreFiniBB->getTerminator();
620  assert(PreFiniTI->getNumSuccessors() == 1 &&
621         PreFiniTI->getSuccessor(0)->size() == 1 &&
622         isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) &&
623         "Unexpected CFG structure!");
624
625  InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator());
626  FiniCB(PreFiniIP);
627
628  for (Instruction *I : ToBeDeleted)
629    I->eraseFromParent();
630
631  return AfterIP;
632}
633