1327952Sdim//===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
2311116Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6311116Sdim//
7311116Sdim//===----------------------------------------------------------------------===//
8311116Sdim//
9311116Sdim// This header defines interfaces to read LLVM bitcode files/streams.
10311116Sdim//
11311116Sdim//===----------------------------------------------------------------------===//
12311116Sdim
13311116Sdim#ifndef LLVM_BITCODE_BITCODEREADER_H
14311116Sdim#define LLVM_BITCODE_BITCODEREADER_H
15311116Sdim
16327952Sdim#include "llvm/ADT/ArrayRef.h"
17327952Sdim#include "llvm/ADT/StringRef.h"
18353358Sdim#include "llvm/Bitstream/BitCodes.h"
19311116Sdim#include "llvm/IR/ModuleSummaryIndex.h"
20311116Sdim#include "llvm/Support/Endian.h"
21311116Sdim#include "llvm/Support/Error.h"
22311116Sdim#include "llvm/Support/ErrorOr.h"
23311116Sdim#include "llvm/Support/MemoryBuffer.h"
24327952Sdim#include <cstdint>
25311116Sdim#include <memory>
26327952Sdim#include <string>
27327952Sdim#include <system_error>
28327952Sdim#include <vector>
29311116Sdimnamespace llvm {
30311116Sdim
31327952Sdimclass LLVMContext;
32327952Sdimclass Module;
33327952Sdim
34311116Sdim  // These functions are for converting Expected/Error values to
35311116Sdim  // ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
36311116Sdim  // Remove these functions once no longer needed by the C and libLTO APIs.
37311116Sdim
38311116Sdim  std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err);
39311116Sdim
40311116Sdim  template <typename T>
41311116Sdim  ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
42311116Sdim    if (!Val)
43311116Sdim      return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
44311116Sdim    return std::move(*Val);
45311116Sdim  }
46311116Sdim
47321369Sdim  struct BitcodeFileContents;
48321369Sdim
49321369Sdim  /// Basic information extracted from a bitcode module to be used for LTO.
50321369Sdim  struct BitcodeLTOInfo {
51321369Sdim    bool IsThinLTO;
52321369Sdim    bool HasSummary;
53344779Sdim    bool EnableSplitLTOUnit;
54321369Sdim  };
55321369Sdim
56311116Sdim  /// Represents a module in a bitcode file.
57311116Sdim  class BitcodeModule {
58311116Sdim    // This covers the identification (if present) and module blocks.
59311116Sdim    ArrayRef<uint8_t> Buffer;
60311116Sdim    StringRef ModuleIdentifier;
61311116Sdim
62321369Sdim    // The string table used to interpret this module.
63321369Sdim    StringRef Strtab;
64321369Sdim
65311116Sdim    // The bitstream location of the IDENTIFICATION_BLOCK.
66311116Sdim    uint64_t IdentificationBit;
67311116Sdim
68311116Sdim    // The bitstream location of this module's MODULE_BLOCK.
69311116Sdim    uint64_t ModuleBit;
70311116Sdim
71311116Sdim    BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
72311116Sdim                  uint64_t IdentificationBit, uint64_t ModuleBit)
73311116Sdim        : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
74311116Sdim          IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
75311116Sdim
76311116Sdim    // Calls the ctor.
77321369Sdim    friend Expected<BitcodeFileContents>
78321369Sdim    getBitcodeFileContents(MemoryBufferRef Buffer);
79311116Sdim
80311116Sdim    Expected<std::unique_ptr<Module>> getModuleImpl(LLVMContext &Context,
81311116Sdim                                                    bool MaterializeAll,
82311116Sdim                                                    bool ShouldLazyLoadMetadata,
83311116Sdim                                                    bool IsImporting);
84311116Sdim
85311116Sdim  public:
86311116Sdim    StringRef getBuffer() const {
87311116Sdim      return StringRef((const char *)Buffer.begin(), Buffer.size());
88311116Sdim    }
89327952Sdim
90321369Sdim    StringRef getStrtab() const { return Strtab; }
91311116Sdim
92311116Sdim    StringRef getModuleIdentifier() const { return ModuleIdentifier; }
93311116Sdim
94311116Sdim    /// Read the bitcode module and prepare for lazy deserialization of function
95311116Sdim    /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
96311116Sdim    /// If IsImporting is true, this module is being parsed for ThinLTO
97311116Sdim    /// importing into another module.
98311116Sdim    Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context,
99311116Sdim                                                    bool ShouldLazyLoadMetadata,
100311116Sdim                                                    bool IsImporting);
101311116Sdim
102311116Sdim    /// Read the entire bitcode module and return it.
103311116Sdim    Expected<std::unique_ptr<Module>> parseModule(LLVMContext &Context);
104311116Sdim
105321369Sdim    /// Returns information about the module to be used for LTO: whether to
106321369Sdim    /// compile with ThinLTO, and whether it has a summary.
107321369Sdim    Expected<BitcodeLTOInfo> getLTOInfo();
108311116Sdim
109311116Sdim    /// Parse the specified bitcode buffer, returning the module summary index.
110311116Sdim    Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
111321369Sdim
112321369Sdim    /// Parse the specified bitcode buffer and merge its module summary index
113321369Sdim    /// into CombinedIndex.
114321369Sdim    Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
115321369Sdim                      uint64_t ModuleId);
116311116Sdim  };
117311116Sdim
118321369Sdim  struct BitcodeFileContents {
119321369Sdim    std::vector<BitcodeModule> Mods;
120321369Sdim    StringRef Symtab, StrtabForSymtab;
121321369Sdim  };
122321369Sdim
123321369Sdim  /// Returns the contents of a bitcode file. This includes the raw contents of
124321369Sdim  /// the symbol table embedded in the bitcode file. Clients which require a
125321369Sdim  /// symbol table should prefer to use irsymtab::read instead of this function
126321369Sdim  /// because it creates a reader for the irsymtab and handles upgrading bitcode
127321369Sdim  /// files without a symbol table or with an old symbol table.
128321369Sdim  Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer);
129321369Sdim
130311116Sdim  /// Returns a list of modules in the specified bitcode buffer.
131311116Sdim  Expected<std::vector<BitcodeModule>>
132311116Sdim  getBitcodeModuleList(MemoryBufferRef Buffer);
133311116Sdim
134311116Sdim  /// Read the header of the specified bitcode buffer and prepare for lazy
135311116Sdim  /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
136311116Sdim  /// lazily load metadata as well. If IsImporting is true, this module is
137311116Sdim  /// being parsed for ThinLTO importing into another module.
138311116Sdim  Expected<std::unique_ptr<Module>>
139311116Sdim  getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
140311116Sdim                       bool ShouldLazyLoadMetadata = false,
141311116Sdim                       bool IsImporting = false);
142311116Sdim
143311116Sdim  /// Like getLazyBitcodeModule, except that the module takes ownership of
144311116Sdim  /// the memory buffer if successful. If successful, this moves Buffer. On
145311116Sdim  /// error, this *does not* move Buffer. If IsImporting is true, this module is
146311116Sdim  /// being parsed for ThinLTO importing into another module.
147311116Sdim  Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
148311116Sdim      std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
149311116Sdim      bool ShouldLazyLoadMetadata = false, bool IsImporting = false);
150311116Sdim
151311116Sdim  /// Read the header of the specified bitcode buffer and extract just the
152311116Sdim  /// triple information. If successful, this returns a string. On error, this
153311116Sdim  /// returns "".
154311116Sdim  Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
155311116Sdim
156311116Sdim  /// Return true if \p Buffer contains a bitcode file with ObjC code (category
157311116Sdim  /// or class) in it.
158311116Sdim  Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
159311116Sdim
160311116Sdim  /// Read the header of the specified bitcode buffer and extract just the
161311116Sdim  /// producer string information. If successful, this returns a string. On
162311116Sdim  /// error, this returns "".
163311116Sdim  Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer);
164311116Sdim
165311116Sdim  /// Read the specified bitcode file, returning the module.
166311116Sdim  Expected<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer,
167311116Sdim                                                     LLVMContext &Context);
168311116Sdim
169321369Sdim  /// Returns LTO information for the specified bitcode file.
170321369Sdim  Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer);
171311116Sdim
172311116Sdim  /// Parse the specified bitcode buffer, returning the module summary index.
173311116Sdim  Expected<std::unique_ptr<ModuleSummaryIndex>>
174311116Sdim  getModuleSummaryIndex(MemoryBufferRef Buffer);
175311116Sdim
176321369Sdim  /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
177321369Sdim  Error readModuleSummaryIndex(MemoryBufferRef Buffer,
178321369Sdim                               ModuleSummaryIndex &CombinedIndex,
179321369Sdim                               uint64_t ModuleId);
180321369Sdim
181321369Sdim  /// Parse the module summary index out of an IR file and return the module
182321369Sdim  /// summary index object if found, or an empty summary if not. If Path refers
183321369Sdim  /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
184321369Sdim  /// this function will return nullptr.
185321369Sdim  Expected<std::unique_ptr<ModuleSummaryIndex>>
186321369Sdim  getModuleSummaryIndexForFile(StringRef Path,
187321369Sdim                               bool IgnoreEmptyThinLTOIndexFile = false);
188321369Sdim
189311116Sdim  /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
190311116Sdim  /// for an LLVM IR bitcode wrapper.
191311116Sdim  inline bool isBitcodeWrapper(const unsigned char *BufPtr,
192311116Sdim                               const unsigned char *BufEnd) {
193311116Sdim    // See if you can find the hidden message in the magic bytes :-).
194311116Sdim    // (Hint: it's a little-endian encoding.)
195311116Sdim    return BufPtr != BufEnd &&
196311116Sdim           BufPtr[0] == 0xDE &&
197311116Sdim           BufPtr[1] == 0xC0 &&
198311116Sdim           BufPtr[2] == 0x17 &&
199311116Sdim           BufPtr[3] == 0x0B;
200311116Sdim  }
201311116Sdim
202311116Sdim  /// isRawBitcode - Return true if the given bytes are the magic bytes for
203311116Sdim  /// raw LLVM IR bitcode (without a wrapper).
204311116Sdim  inline bool isRawBitcode(const unsigned char *BufPtr,
205311116Sdim                           const unsigned char *BufEnd) {
206311116Sdim    // These bytes sort of have a hidden message, but it's not in
207311116Sdim    // little-endian this time, and it's a little redundant.
208311116Sdim    return BufPtr != BufEnd &&
209311116Sdim           BufPtr[0] == 'B' &&
210311116Sdim           BufPtr[1] == 'C' &&
211311116Sdim           BufPtr[2] == 0xc0 &&
212311116Sdim           BufPtr[3] == 0xde;
213311116Sdim  }
214311116Sdim
215311116Sdim  /// isBitcode - Return true if the given bytes are the magic bytes for
216311116Sdim  /// LLVM IR bitcode, either with or without a wrapper.
217311116Sdim  inline bool isBitcode(const unsigned char *BufPtr,
218311116Sdim                        const unsigned char *BufEnd) {
219311116Sdim    return isBitcodeWrapper(BufPtr, BufEnd) ||
220311116Sdim           isRawBitcode(BufPtr, BufEnd);
221311116Sdim  }
222311116Sdim
223311116Sdim  /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
224311116Sdim  /// header for padding or other reasons.  The format of this header is:
225311116Sdim  ///
226311116Sdim  /// struct bc_header {
227311116Sdim  ///   uint32_t Magic;         // 0x0B17C0DE
228311116Sdim  ///   uint32_t Version;       // Version, currently always 0.
229311116Sdim  ///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
230311116Sdim  ///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
231311116Sdim  ///   ... potentially other gunk ...
232311116Sdim  /// };
233311116Sdim  ///
234311116Sdim  /// This function is called when we find a file with a matching magic number.
235311116Sdim  /// In this case, skip down to the subsection of the file that is actually a
236311116Sdim  /// BC file.
237311116Sdim  /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
238311116Sdim  /// contain the whole bitcode file.
239311116Sdim  inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
240311116Sdim                                       const unsigned char *&BufEnd,
241311116Sdim                                       bool VerifyBufferSize) {
242311116Sdim    // Must contain the offset and size field!
243311116Sdim    if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
244311116Sdim      return true;
245311116Sdim
246311116Sdim    unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
247311116Sdim    unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
248311116Sdim    uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
249311116Sdim
250311116Sdim    // Verify that Offset+Size fits in the file.
251311116Sdim    if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
252311116Sdim      return true;
253311116Sdim    BufPtr += Offset;
254311116Sdim    BufEnd = BufPtr+Size;
255311116Sdim    return false;
256311116Sdim  }
257311116Sdim
258311116Sdim  const std::error_category &BitcodeErrorCategory();
259311116Sdim  enum class BitcodeError { CorruptedBitcode = 1 };
260311116Sdim  inline std::error_code make_error_code(BitcodeError E) {
261311116Sdim    return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
262311116Sdim  }
263311116Sdim
264327952Sdim} // end namespace llvm
265311116Sdim
266311116Sdimnamespace std {
267327952Sdim
268311116Sdimtemplate <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
269311116Sdim
270327952Sdim} // end namespace std
271327952Sdim
272327952Sdim#endif // LLVM_BITCODE_BITCODEREADER_H
273