1272343Sngie//===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
2272343Sngie//
3272343Sngie//                     The LLVM Compiler Infrastructure
4272343Sngie//
5272343Sngie// This file is distributed under the University of Illinois Open Source
6272343Sngie// License. See LICENSE.TXT for details.
7272343Sngie//
8272343Sngie//===----------------------------------------------------------------------===//
9272343Sngie//
10272343Sngie// This file implements Function import based on summaries.
11272343Sngie//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/IPO/FunctionImport.h"
15
16#include "llvm/ADT/StringSet.h"
17#include "llvm/IR/AutoUpgrade.h"
18#include "llvm/IR/DiagnosticPrinter.h"
19#include "llvm/IR/IntrinsicInst.h"
20#include "llvm/IR/Module.h"
21#include "llvm/IRReader/IRReader.h"
22#include "llvm/Linker/Linker.h"
23#include "llvm/Object/FunctionIndexObjectFile.h"
24#include "llvm/Support/CommandLine.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/SourceMgr.h"
27
28#include <map>
29
30using namespace llvm;
31
32#define DEBUG_TYPE "function-import"
33
34/// Limit on instruction count of imported functions.
35static cl::opt<unsigned> ImportInstrLimit(
36    "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
37    cl::desc("Only import functions with less than N instructions"));
38
39// Load lazily a module from \p FileName in \p Context.
40static std::unique_ptr<Module> loadFile(const std::string &FileName,
41                                        LLVMContext &Context) {
42  SMDiagnostic Err;
43  DEBUG(dbgs() << "Loading '" << FileName << "'\n");
44  // Metadata isn't loaded or linked until after all functions are
45  // imported, after which it will be materialized and linked.
46  std::unique_ptr<Module> Result =
47      getLazyIRFileModule(FileName, Err, Context,
48                          /* ShouldLazyLoadMetadata = */ true);
49  if (!Result) {
50    Err.print("function-import", errs());
51    return nullptr;
52  }
53
54  return Result;
55}
56
57namespace {
58/// Helper to load on demand a Module from file and cache it for subsequent
59/// queries. It can be used with the FunctionImporter.
60class ModuleLazyLoaderCache {
61  /// Cache of lazily loaded module for import.
62  StringMap<std::unique_ptr<Module>> ModuleMap;
63
64  /// Retrieve a Module from the cache or lazily load it on demand.
65  std::function<std::unique_ptr<Module>(StringRef FileName)> createLazyModule;
66
67public:
68  /// Create the loader, Module will be initialized in \p Context.
69  ModuleLazyLoaderCache(std::function<
70      std::unique_ptr<Module>(StringRef FileName)> createLazyModule)
71      : createLazyModule(createLazyModule) {}
72
73  /// Retrieve a Module from the cache or lazily load it on demand.
74  Module &operator()(StringRef FileName);
75
76  std::unique_ptr<Module> takeModule(StringRef FileName) {
77    auto I = ModuleMap.find(FileName);
78    assert(I != ModuleMap.end());
79    std::unique_ptr<Module> Ret = std::move(I->second);
80    ModuleMap.erase(I);
81    return Ret;
82  }
83};
84
85// Get a Module for \p FileName from the cache, or load it lazily.
86Module &ModuleLazyLoaderCache::operator()(StringRef Identifier) {
87  auto &Module = ModuleMap[Identifier];
88  if (!Module)
89    Module = createLazyModule(Identifier);
90  return *Module;
91}
92} // anonymous namespace
93
94/// Walk through the instructions in \p F looking for external
95/// calls not already in the \p CalledFunctions set. If any are
96/// found they are added to the \p Worklist for importing.
97static void findExternalCalls(const Module &DestModule, Function &F,
98                              const FunctionInfoIndex &Index,
99                              StringSet<> &CalledFunctions,
100                              SmallVector<StringRef, 64> &Worklist) {
101  // We need to suffix internal function calls imported from other modules,
102  // prepare the suffix ahead of time.
103  std::string Suffix;
104  if (F.getParent() != &DestModule)
105    Suffix =
106        (Twine(".llvm.") +
107         Twine(Index.getModuleId(F.getParent()->getModuleIdentifier()))).str();
108
109  for (auto &BB : F) {
110    for (auto &I : BB) {
111      if (isa<CallInst>(I)) {
112        auto CalledFunction = cast<CallInst>(I).getCalledFunction();
113        // Insert any new external calls that have not already been
114        // added to set/worklist.
115        if (!CalledFunction || !CalledFunction->hasName())
116          continue;
117        // Ignore intrinsics early
118        if (CalledFunction->isIntrinsic()) {
119          assert(CalledFunction->getIntrinsicID() != 0);
120          continue;
121        }
122        auto ImportedName = CalledFunction->getName();
123        auto Renamed = (ImportedName + Suffix).str();
124        // Rename internal functions
125        if (CalledFunction->hasInternalLinkage()) {
126          ImportedName = Renamed;
127        }
128        auto It = CalledFunctions.insert(ImportedName);
129        if (!It.second) {
130          // This is a call to a function we already considered, skip.
131          continue;
132        }
133        // Ignore functions already present in the destination module
134        auto *SrcGV = DestModule.getNamedValue(ImportedName);
135        if (SrcGV) {
136          if (GlobalAlias *SGA = dyn_cast<GlobalAlias>(SrcGV))
137            SrcGV = SGA->getBaseObject();
138          assert(isa<Function>(SrcGV) && "Name collision during import");
139          if (!cast<Function>(SrcGV)->isDeclaration()) {
140            DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring "
141                         << ImportedName << " already in DestinationModule\n");
142            continue;
143          }
144        }
145
146        Worklist.push_back(It.first->getKey());
147        DEBUG(dbgs() << DestModule.getModuleIdentifier()
148                     << ": Adding callee for : " << ImportedName << " : "
149                     << F.getName() << "\n");
150      }
151    }
152  }
153}
154
155// Helper function: given a worklist and an index, will process all the worklist
156// and decide what to import based on the summary information.
157//
158// Nothing is actually imported, functions are materialized in their source
159// module and analyzed there.
160//
161// \p ModuleToFunctionsToImportMap is filled with the set of Function to import
162// per Module.
163static void GetImportList(Module &DestModule,
164                          SmallVector<StringRef, 64> &Worklist,
165                          StringSet<> &CalledFunctions,
166                          std::map<StringRef, DenseSet<const GlobalValue *>>
167                              &ModuleToFunctionsToImportMap,
168                          const FunctionInfoIndex &Index,
169                          ModuleLazyLoaderCache &ModuleLoaderCache) {
170  while (!Worklist.empty()) {
171    auto CalledFunctionName = Worklist.pop_back_val();
172    DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Process import for "
173                 << CalledFunctionName << "\n");
174
175    // Try to get a summary for this function call.
176    auto InfoList = Index.findFunctionInfoList(CalledFunctionName);
177    if (InfoList == Index.end()) {
178      DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": No summary for "
179                   << CalledFunctionName << " Ignoring.\n");
180      continue;
181    }
182    assert(!InfoList->second.empty() && "No summary, error at import?");
183
184    // Comdat can have multiple entries, FIXME: what do we do with them?
185    auto &Info = InfoList->second[0];
186    assert(Info && "Nullptr in list, error importing summaries?\n");
187
188    auto *Summary = Info->functionSummary();
189    if (!Summary) {
190      // FIXME: in case we are lazyloading summaries, we can do it now.
191      DEBUG(dbgs() << DestModule.getModuleIdentifier()
192                   << ": Missing summary for  " << CalledFunctionName
193                   << ", error at import?\n");
194      llvm_unreachable("Missing summary");
195    }
196
197    if (Summary->instCount() > ImportInstrLimit) {
198      DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Skip import of "
199                   << CalledFunctionName << " with " << Summary->instCount()
200                   << " instructions (limit " << ImportInstrLimit << ")\n");
201      continue;
202    }
203
204    // Get the module path from the summary.
205    auto ModuleIdentifier = Summary->modulePath();
206    DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Importing "
207                 << CalledFunctionName << " from " << ModuleIdentifier << "\n");
208
209    auto &SrcModule = ModuleLoaderCache(ModuleIdentifier);
210
211    // The function that we will import!
212    GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName);
213
214    if (!SGV) {
215      // The destination module is referencing function using their renamed name
216      // when importing a function that was originally local in the source
217      // module. The source module we have might not have been renamed so we try
218      // to remove the suffix added during the renaming to recover the original
219      // name in the source module.
220      std::pair<StringRef, StringRef> Split =
221          CalledFunctionName.split(".llvm.");
222      SGV = SrcModule.getNamedValue(Split.first);
223      assert(SGV && "Can't find function to import in source module");
224    }
225    if (!SGV) {
226      report_fatal_error(Twine("Can't load function '") + CalledFunctionName +
227                         "' in Module '" + SrcModule.getModuleIdentifier() +
228                         "', error in the summary?\n");
229    }
230
231    Function *F = dyn_cast<Function>(SGV);
232    if (!F && isa<GlobalAlias>(SGV)) {
233      auto *SGA = dyn_cast<GlobalAlias>(SGV);
234      F = dyn_cast<Function>(SGA->getBaseObject());
235      CalledFunctionName = F->getName();
236    }
237    assert(F && "Imported Function is ... not a Function");
238
239    // We cannot import weak_any functions/aliases without possibly affecting
240    // the order they are seen and selected by the linker, changing program
241    // semantics.
242    if (SGV->hasWeakAnyLinkage()) {
243      DEBUG(dbgs() << DestModule.getModuleIdentifier()
244                   << ": Ignoring import request for weak-any "
245                   << (isa<Function>(SGV) ? "function " : "alias ")
246                   << CalledFunctionName << " from "
247                   << SrcModule.getModuleIdentifier() << "\n");
248      continue;
249    }
250
251    // Add the function to the import list
252    auto &Entry = ModuleToFunctionsToImportMap[SrcModule.getModuleIdentifier()];
253    Entry.insert(F);
254
255    // Process the newly imported functions and add callees to the worklist.
256    F->materialize();
257    findExternalCalls(DestModule, *F, Index, CalledFunctions, Worklist);
258  }
259}
260
261// Automatically import functions in Module \p DestModule based on the summaries
262// index.
263//
264// The current implementation imports every called functions that exists in the
265// summaries index.
266bool FunctionImporter::importFunctions(Module &DestModule) {
267  DEBUG(dbgs() << "Starting import for Module "
268               << DestModule.getModuleIdentifier() << "\n");
269  unsigned ImportedCount = 0;
270
271  /// First step is collecting the called external functions.
272  StringSet<> CalledFunctions;
273  SmallVector<StringRef, 64> Worklist;
274  for (auto &F : DestModule) {
275    if (F.isDeclaration() || F.hasFnAttribute(Attribute::OptimizeNone))
276      continue;
277    findExternalCalls(DestModule, F, Index, CalledFunctions, Worklist);
278  }
279  if (Worklist.empty())
280    return false;
281
282  /// Second step: for every call to an external function, try to import it.
283
284  // Linker that will be used for importing function
285  Linker TheLinker(DestModule);
286
287  // Map of Module -> List of Function to import from the Module
288  std::map<StringRef, DenseSet<const GlobalValue *>>
289      ModuleToFunctionsToImportMap;
290
291  // Analyze the summaries and get the list of functions to import by
292  // populating ModuleToFunctionsToImportMap
293  ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader);
294  GetImportList(DestModule, Worklist, CalledFunctions,
295                ModuleToFunctionsToImportMap, Index, ModuleLoaderCache);
296  assert(Worklist.empty() && "Worklist hasn't been flushed in GetImportList");
297
298  StringMap<std::unique_ptr<DenseMap<unsigned, MDNode *>>>
299      ModuleToTempMDValsMap;
300
301  // Do the actual import of functions now, one Module at a time
302  for (auto &FunctionsToImportPerModule : ModuleToFunctionsToImportMap) {
303    // Get the module for the import
304    auto &FunctionsToImport = FunctionsToImportPerModule.second;
305    std::unique_ptr<Module> SrcModule =
306        ModuleLoaderCache.takeModule(FunctionsToImportPerModule.first);
307    assert(&DestModule.getContext() == &SrcModule->getContext() &&
308           "Context mismatch");
309
310    // Save the mapping of value ids to temporary metadata created when
311    // importing this function. If we have already imported from this module,
312    // add new temporary metadata to the existing mapping.
313    auto &TempMDVals = ModuleToTempMDValsMap[SrcModule->getModuleIdentifier()];
314    if (!TempMDVals)
315      TempMDVals = llvm::make_unique<DenseMap<unsigned, MDNode *>>();
316
317    // Link in the specified functions.
318    if (TheLinker.linkInModule(std::move(SrcModule), Linker::Flags::None,
319                               &Index, &FunctionsToImport, TempMDVals.get()))
320      report_fatal_error("Function Import: link error");
321
322    ImportedCount += FunctionsToImport.size();
323  }
324
325  // Now link in metadata for all modules from which we imported functions.
326  for (StringMapEntry<std::unique_ptr<DenseMap<unsigned, MDNode *>>> &SME :
327       ModuleToTempMDValsMap) {
328    // Load the specified source module.
329    auto &SrcModule = ModuleLoaderCache(SME.getKey());
330    // The modules were created with lazy metadata loading. Materialize it
331    // now, before linking it.
332    SrcModule.materializeMetadata();
333    UpgradeDebugInfo(SrcModule);
334
335    // Link in all necessary metadata from this module.
336    if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get()))
337      return false;
338  }
339
340  DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module "
341               << DestModule.getModuleIdentifier() << "\n");
342  return ImportedCount;
343}
344
345/// Summary file to use for function importing when using -function-import from
346/// the command line.
347static cl::opt<std::string>
348    SummaryFile("summary-file",
349                cl::desc("The summary file to use for function importing."));
350
351static void diagnosticHandler(const DiagnosticInfo &DI) {
352  raw_ostream &OS = errs();
353  DiagnosticPrinterRawOStream DP(OS);
354  DI.print(DP);
355  OS << '\n';
356}
357
358/// Parse the function index out of an IR file and return the function
359/// index object if found, or nullptr if not.
360static std::unique_ptr<FunctionInfoIndex>
361getFunctionIndexForFile(StringRef Path, std::string &Error,
362                        DiagnosticHandlerFunction DiagnosticHandler) {
363  std::unique_ptr<MemoryBuffer> Buffer;
364  ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
365      MemoryBuffer::getFile(Path);
366  if (std::error_code EC = BufferOrErr.getError()) {
367    Error = EC.message();
368    return nullptr;
369  }
370  Buffer = std::move(BufferOrErr.get());
371  ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr =
372      object::FunctionIndexObjectFile::create(Buffer->getMemBufferRef(),
373                                              DiagnosticHandler);
374  if (std::error_code EC = ObjOrErr.getError()) {
375    Error = EC.message();
376    return nullptr;
377  }
378  return (*ObjOrErr)->takeIndex();
379}
380
381namespace {
382/// Pass that performs cross-module function import provided a summary file.
383class FunctionImportPass : public ModulePass {
384  /// Optional function summary index to use for importing, otherwise
385  /// the summary-file option must be specified.
386  const FunctionInfoIndex *Index;
387
388public:
389  /// Pass identification, replacement for typeid
390  static char ID;
391
392  /// Specify pass name for debug output
393  const char *getPassName() const override {
394    return "Function Importing";
395  }
396
397  explicit FunctionImportPass(const FunctionInfoIndex *Index = nullptr)
398      : ModulePass(ID), Index(Index) {}
399
400  bool runOnModule(Module &M) override {
401    if (SummaryFile.empty() && !Index)
402      report_fatal_error("error: -function-import requires -summary-file or "
403                         "file from frontend\n");
404    std::unique_ptr<FunctionInfoIndex> IndexPtr;
405    if (!SummaryFile.empty()) {
406      if (Index)
407        report_fatal_error("error: -summary-file and index from frontend\n");
408      std::string Error;
409      IndexPtr = getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler);
410      if (!IndexPtr) {
411        errs() << "Error loading file '" << SummaryFile << "': " << Error
412               << "\n";
413        return false;
414      }
415      Index = IndexPtr.get();
416    }
417
418    // First we need to promote to global scope and rename any local values that
419    // are potentially exported to other modules.
420    if (renameModuleForThinLTO(M, Index)) {
421      errs() << "Error renaming module\n";
422      return false;
423    }
424
425    // Perform the import now.
426    auto ModuleLoader = [&M](StringRef Identifier) {
427      return loadFile(Identifier, M.getContext());
428    };
429    FunctionImporter Importer(*Index, ModuleLoader);
430    return Importer.importFunctions(M);
431  }
432};
433} // anonymous namespace
434
435char FunctionImportPass::ID = 0;
436INITIALIZE_PASS_BEGIN(FunctionImportPass, "function-import",
437                      "Summary Based Function Import", false, false)
438INITIALIZE_PASS_END(FunctionImportPass, "function-import",
439                    "Summary Based Function Import", false, false)
440
441namespace llvm {
442Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr) {
443  return new FunctionImportPass(Index);
444}
445}
446