ThinLTOCodeGenerator.h revision 314564
1//===-ThinLTOCodeGenerator.h - LLVM Link Time Optimizer -------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file declares the ThinLTOCodeGenerator class, similar to the
11// LTOCodeGenerator but for the ThinLTO scheme. It provides an interface for
12// linker plugin.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_LTO_THINLTOCODEGENERATOR_H
17#define LLVM_LTO_THINLTOCODEGENERATOR_H
18
19#include "llvm-c/lto.h"
20#include "llvm/ADT/StringSet.h"
21#include "llvm/ADT/Triple.h"
22#include "llvm/IR/ModuleSummaryIndex.h"
23#include "llvm/Support/CodeGen.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Target/TargetOptions.h"
26
27#include <string>
28
29namespace llvm {
30class StringRef;
31class LLVMContext;
32class TargetMachine;
33
34/// Wrapper around MemoryBufferRef, owning the identifier
35class ThinLTOBuffer {
36  std::string OwnedIdentifier;
37  StringRef Buffer;
38
39public:
40  ThinLTOBuffer(StringRef Buffer, StringRef Identifier)
41      : OwnedIdentifier(Identifier), Buffer(Buffer) {}
42
43  MemoryBufferRef getMemBuffer() const {
44    return MemoryBufferRef(Buffer,
45                           {OwnedIdentifier.c_str(), OwnedIdentifier.size()});
46  }
47  StringRef getBuffer() const { return Buffer; }
48  StringRef getBufferIdentifier() const { return OwnedIdentifier; }
49};
50
51/// Helper to gather options relevant to the target machine creation
52struct TargetMachineBuilder {
53  Triple TheTriple;
54  std::string MCpu;
55  std::string MAttr;
56  TargetOptions Options;
57  Optional<Reloc::Model> RelocModel;
58  CodeGenOpt::Level CGOptLevel = CodeGenOpt::Aggressive;
59
60  std::unique_ptr<TargetMachine> create() const;
61};
62
63/// This class define an interface similar to the LTOCodeGenerator, but adapted
64/// for ThinLTO processing.
65/// The ThinLTOCodeGenerator is not intended to be reuse for multiple
66/// compilation: the model is that the client adds modules to the generator and
67/// ask to perform the ThinLTO optimizations / codegen, and finally destroys the
68/// codegenerator.
69class ThinLTOCodeGenerator {
70public:
71  /// Add given module to the code generator.
72  void addModule(StringRef Identifier, StringRef Data);
73
74  /**
75   * Adds to a list of all global symbols that must exist in the final generated
76   * code. If a symbol is not listed there, it will be optimized away if it is
77   * inlined into every usage.
78   */
79  void preserveSymbol(StringRef Name);
80
81  /**
82   * Adds to a list of all global symbols that are cross-referenced between
83   * ThinLTO files. If the ThinLTO CodeGenerator can ensure that every
84   * references from a ThinLTO module to this symbol is optimized away, then
85   * the symbol can be discarded.
86   */
87  void crossReferenceSymbol(StringRef Name);
88
89  /**
90   * Process all the modules that were added to the code generator in parallel.
91   *
92   * Client can access the resulting object files using getProducedBinaries(),
93   * unless setGeneratedObjectsDirectory() has been called, in which case
94   * results are available through getProducedBinaryFiles().
95   */
96  void run();
97
98  /**
99   * Return the "in memory" binaries produced by the code generator. This is
100   * filled after run() unless setGeneratedObjectsDirectory() has been
101   * called, in which case results are available through
102   * getProducedBinaryFiles().
103   */
104  std::vector<std::unique_ptr<MemoryBuffer>> &getProducedBinaries() {
105    return ProducedBinaries;
106  }
107
108  /**
109   * Return the "on-disk" binaries produced by the code generator. This is
110   * filled after run() when setGeneratedObjectsDirectory() has been
111   * called, in which case results are available through getProducedBinaries().
112   */
113  std::vector<std::string> &getProducedBinaryFiles() {
114    return ProducedBinaryFiles;
115  }
116
117  /**
118   * \defgroup Options setters
119   * @{
120   */
121
122  /**
123   * \defgroup Cache controlling options
124   *
125   * These entry points control the ThinLTO cache. The cache is intended to
126   * support incremental build, and thus needs to be persistent accross build.
127   * The client enabled the cache by supplying a path to an existing directory.
128   * The code generator will use this to store objects files that may be reused
129   * during a subsequent build.
130   * To avoid filling the disk space, a few knobs are provided:
131   *  - The pruning interval limit the frequency at which the garbage collector
132   *    will try to scan the cache directory to prune it from expired entries.
133   *    Setting to -1 disable the pruning (default).
134   *  - The pruning expiration time indicates to the garbage collector how old
135   *    an entry needs to be to be removed.
136   *  - Finally, the garbage collector can be instructed to prune the cache till
137   *    the occupied space goes below a threshold.
138   * @{
139   */
140
141  struct CachingOptions {
142    std::string Path;                    // Path to the cache, empty to disable.
143    int PruningInterval = 1200;          // seconds, -1 to disable pruning.
144    unsigned int Expiration = 7 * 24 * 3600;     // seconds (1w default).
145    unsigned MaxPercentageOfAvailableSpace = 75; // percentage.
146  };
147
148  /// Provide a path to a directory where to store the cached files for
149  /// incremental build.
150  void setCacheDir(std::string Path) { CacheOptions.Path = std::move(Path); }
151
152  /// Cache policy: interval (seconds) between two prune of the cache. Set to a
153  /// negative value (default) to disable pruning. A value of 0 will be ignored.
154  void setCachePruningInterval(int Interval) {
155    if (Interval)
156      CacheOptions.PruningInterval = Interval;
157  }
158
159  /// Cache policy: expiration (in seconds) for an entry.
160  /// A value of 0 will be ignored.
161  void setCacheEntryExpiration(unsigned Expiration) {
162    if (Expiration)
163      CacheOptions.Expiration = Expiration;
164  }
165
166  /**
167   * Sets the maximum cache size that can be persistent across build, in terms
168   * of percentage of the available space on the the disk. Set to 100 to
169   * indicate no limit, 50 to indicate that the cache size will not be left over
170   * half the available space. A value over 100 will be reduced to 100, and a
171   * value of 0 will be ignored.
172   *
173   *
174   * The formula looks like:
175   *  AvailableSpace = FreeSpace + ExistingCacheSize
176   *  NewCacheSize = AvailableSpace * P/100
177   *
178   */
179  void setMaxCacheSizeRelativeToAvailableSpace(unsigned Percentage) {
180    if (Percentage)
181      CacheOptions.MaxPercentageOfAvailableSpace = Percentage;
182  }
183
184  /**@}*/
185
186  /// Set the path to a directory where to save temporaries at various stages of
187  /// the processing.
188  void setSaveTempsDir(std::string Path) { SaveTempsDir = std::move(Path); }
189
190  /// Set the path to a directory where to save generated object files. This
191  /// path can be used by a linker to request on-disk files instead of in-memory
192  /// buffers. When set, results are available through getProducedBinaryFiles()
193  /// instead of getProducedBinaries().
194  void setGeneratedObjectsDirectory(std::string Path) {
195    SavedObjectsDirectoryPath = std::move(Path);
196  }
197
198  /// CPU to use to initialize the TargetMachine
199  void setCpu(std::string Cpu) { TMBuilder.MCpu = std::move(Cpu); }
200
201  /// Subtarget attributes
202  void setAttr(std::string MAttr) { TMBuilder.MAttr = std::move(MAttr); }
203
204  /// TargetMachine options
205  void setTargetOptions(TargetOptions Options) {
206    TMBuilder.Options = std::move(Options);
207  }
208
209  /// CodeModel
210  void setCodePICModel(Optional<Reloc::Model> Model) {
211    TMBuilder.RelocModel = Model;
212  }
213
214  /// CodeGen optimization level
215  void setCodeGenOptLevel(CodeGenOpt::Level CGOptLevel) {
216    TMBuilder.CGOptLevel = CGOptLevel;
217  }
218
219  /// IR optimization level: from 0 to 3.
220  void setOptLevel(unsigned NewOptLevel) {
221    OptLevel = (NewOptLevel > 3) ? 3 : NewOptLevel;
222  }
223
224  /// Disable CodeGen, only run the stages till codegen and stop. The output
225  /// will be bitcode.
226  void disableCodeGen(bool Disable) { DisableCodeGen = Disable; }
227
228  /// Perform CodeGen only: disable all other stages.
229  void setCodeGenOnly(bool CGOnly) { CodeGenOnly = CGOnly; }
230
231  /**@}*/
232
233  /**
234   * \defgroup Set of APIs to run individual stages in isolation.
235   * @{
236   */
237
238  /**
239   * Produce the combined summary index from all the bitcode files:
240   * "thin-link".
241   */
242  std::unique_ptr<ModuleSummaryIndex> linkCombinedIndex();
243
244  /**
245   * Perform promotion and renaming of exported internal functions,
246   * and additionally resolve weak and linkonce symbols.
247   * Index is updated to reflect linkage changes from weak resolution.
248   */
249  void promote(Module &Module, ModuleSummaryIndex &Index);
250
251  /**
252   * Compute and emit the imported files for module at \p ModulePath.
253   */
254  static void emitImports(StringRef ModulePath, StringRef OutputName,
255                          ModuleSummaryIndex &Index);
256
257  /**
258   * Perform cross-module importing for the module identified by
259   * ModuleIdentifier.
260   */
261  void crossModuleImport(Module &Module, ModuleSummaryIndex &Index);
262
263  /**
264   * Compute the list of summaries needed for importing into module.
265   */
266  static void gatherImportedSummariesForModule(
267      StringRef ModulePath, ModuleSummaryIndex &Index,
268      std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex);
269
270  /**
271   * Perform internalization. Index is updated to reflect linkage changes.
272   */
273  void internalize(Module &Module, ModuleSummaryIndex &Index);
274
275  /**
276   * Perform post-importing ThinLTO optimizations.
277   */
278  void optimize(Module &Module);
279
280  /**
281   * Perform ThinLTO CodeGen.
282   */
283  std::unique_ptr<MemoryBuffer> codegen(Module &Module);
284
285  /**@}*/
286
287private:
288  /// Helper factory to build a TargetMachine
289  TargetMachineBuilder TMBuilder;
290
291  /// Vector holding the in-memory buffer containing the produced binaries, when
292  /// SavedObjectsDirectoryPath isn't set.
293  std::vector<std::unique_ptr<MemoryBuffer>> ProducedBinaries;
294
295  /// Path to generated files in the supplied SavedObjectsDirectoryPath if any.
296  std::vector<std::string> ProducedBinaryFiles;
297
298  /// Vector holding the input buffers containing the bitcode modules to
299  /// process.
300  std::vector<ThinLTOBuffer> Modules;
301
302  /// Set of symbols that need to be preserved outside of the set of bitcode
303  /// files.
304  StringSet<> PreservedSymbols;
305
306  /// Set of symbols that are cross-referenced between bitcode files.
307  StringSet<> CrossReferencedSymbols;
308
309  /// Control the caching behavior.
310  CachingOptions CacheOptions;
311
312  /// Path to a directory to save the temporary bitcode files.
313  std::string SaveTempsDir;
314
315  /// Path to a directory to save the generated object files.
316  std::string SavedObjectsDirectoryPath;
317
318  /// Flag to enable/disable CodeGen. When set to true, the process stops after
319  /// optimizations and a bitcode is produced.
320  bool DisableCodeGen = false;
321
322  /// Flag to indicate that only the CodeGen will be performed, no cross-module
323  /// importing or optimization.
324  bool CodeGenOnly = false;
325
326  /// IR Optimization Level [0-3].
327  unsigned OptLevel = 3;
328};
329}
330#endif
331