1327952Sdim//===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===// 2311116Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6311116Sdim// 7311116Sdim//===----------------------------------------------------------------------===// 8311116Sdim// 9311116Sdim// This header defines interfaces to read LLVM bitcode files/streams. 10311116Sdim// 11311116Sdim//===----------------------------------------------------------------------===// 12311116Sdim 13311116Sdim#ifndef LLVM_BITCODE_BITCODEREADER_H 14311116Sdim#define LLVM_BITCODE_BITCODEREADER_H 15311116Sdim 16327952Sdim#include "llvm/ADT/ArrayRef.h" 17327952Sdim#include "llvm/ADT/StringRef.h" 18353358Sdim#include "llvm/Bitstream/BitCodes.h" 19311116Sdim#include "llvm/IR/ModuleSummaryIndex.h" 20311116Sdim#include "llvm/Support/Endian.h" 21311116Sdim#include "llvm/Support/Error.h" 22311116Sdim#include "llvm/Support/ErrorOr.h" 23311116Sdim#include "llvm/Support/MemoryBuffer.h" 24327952Sdim#include <cstdint> 25311116Sdim#include <memory> 26327952Sdim#include <string> 27327952Sdim#include <system_error> 28327952Sdim#include <vector> 29311116Sdimnamespace llvm { 30311116Sdim 31327952Sdimclass LLVMContext; 32327952Sdimclass Module; 33327952Sdim 34311116Sdim // These functions are for converting Expected/Error values to 35311116Sdim // ErrorOr/std::error_code for compatibility with legacy clients. FIXME: 36311116Sdim // Remove these functions once no longer needed by the C and libLTO APIs. 37311116Sdim 38311116Sdim std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err); 39311116Sdim 40311116Sdim template <typename T> 41311116Sdim ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) { 42311116Sdim if (!Val) 43311116Sdim return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError()); 44311116Sdim return std::move(*Val); 45311116Sdim } 46311116Sdim 47321369Sdim struct BitcodeFileContents; 48321369Sdim 49321369Sdim /// Basic information extracted from a bitcode module to be used for LTO. 50321369Sdim struct BitcodeLTOInfo { 51321369Sdim bool IsThinLTO; 52321369Sdim bool HasSummary; 53344779Sdim bool EnableSplitLTOUnit; 54321369Sdim }; 55321369Sdim 56311116Sdim /// Represents a module in a bitcode file. 57311116Sdim class BitcodeModule { 58311116Sdim // This covers the identification (if present) and module blocks. 59311116Sdim ArrayRef<uint8_t> Buffer; 60311116Sdim StringRef ModuleIdentifier; 61311116Sdim 62321369Sdim // The string table used to interpret this module. 63321369Sdim StringRef Strtab; 64321369Sdim 65311116Sdim // The bitstream location of the IDENTIFICATION_BLOCK. 66311116Sdim uint64_t IdentificationBit; 67311116Sdim 68311116Sdim // The bitstream location of this module's MODULE_BLOCK. 69311116Sdim uint64_t ModuleBit; 70311116Sdim 71311116Sdim BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier, 72311116Sdim uint64_t IdentificationBit, uint64_t ModuleBit) 73311116Sdim : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier), 74311116Sdim IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} 75311116Sdim 76311116Sdim // Calls the ctor. 77321369Sdim friend Expected<BitcodeFileContents> 78321369Sdim getBitcodeFileContents(MemoryBufferRef Buffer); 79311116Sdim 80311116Sdim Expected<std::unique_ptr<Module>> getModuleImpl(LLVMContext &Context, 81311116Sdim bool MaterializeAll, 82311116Sdim bool ShouldLazyLoadMetadata, 83311116Sdim bool IsImporting); 84311116Sdim 85311116Sdim public: 86311116Sdim StringRef getBuffer() const { 87311116Sdim return StringRef((const char *)Buffer.begin(), Buffer.size()); 88311116Sdim } 89327952Sdim 90321369Sdim StringRef getStrtab() const { return Strtab; } 91311116Sdim 92311116Sdim StringRef getModuleIdentifier() const { return ModuleIdentifier; } 93311116Sdim 94311116Sdim /// Read the bitcode module and prepare for lazy deserialization of function 95311116Sdim /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well. 96311116Sdim /// If IsImporting is true, this module is being parsed for ThinLTO 97311116Sdim /// importing into another module. 98311116Sdim Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context, 99311116Sdim bool ShouldLazyLoadMetadata, 100311116Sdim bool IsImporting); 101311116Sdim 102311116Sdim /// Read the entire bitcode module and return it. 103311116Sdim Expected<std::unique_ptr<Module>> parseModule(LLVMContext &Context); 104311116Sdim 105321369Sdim /// Returns information about the module to be used for LTO: whether to 106321369Sdim /// compile with ThinLTO, and whether it has a summary. 107321369Sdim Expected<BitcodeLTOInfo> getLTOInfo(); 108311116Sdim 109311116Sdim /// Parse the specified bitcode buffer, returning the module summary index. 110311116Sdim Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary(); 111321369Sdim 112321369Sdim /// Parse the specified bitcode buffer and merge its module summary index 113321369Sdim /// into CombinedIndex. 114321369Sdim Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, 115321369Sdim uint64_t ModuleId); 116311116Sdim }; 117311116Sdim 118321369Sdim struct BitcodeFileContents { 119321369Sdim std::vector<BitcodeModule> Mods; 120321369Sdim StringRef Symtab, StrtabForSymtab; 121321369Sdim }; 122321369Sdim 123321369Sdim /// Returns the contents of a bitcode file. This includes the raw contents of 124321369Sdim /// the symbol table embedded in the bitcode file. Clients which require a 125321369Sdim /// symbol table should prefer to use irsymtab::read instead of this function 126321369Sdim /// because it creates a reader for the irsymtab and handles upgrading bitcode 127321369Sdim /// files without a symbol table or with an old symbol table. 128321369Sdim Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer); 129321369Sdim 130311116Sdim /// Returns a list of modules in the specified bitcode buffer. 131311116Sdim Expected<std::vector<BitcodeModule>> 132311116Sdim getBitcodeModuleList(MemoryBufferRef Buffer); 133311116Sdim 134311116Sdim /// Read the header of the specified bitcode buffer and prepare for lazy 135311116Sdim /// deserialization of function bodies. If ShouldLazyLoadMetadata is true, 136311116Sdim /// lazily load metadata as well. If IsImporting is true, this module is 137311116Sdim /// being parsed for ThinLTO importing into another module. 138311116Sdim Expected<std::unique_ptr<Module>> 139311116Sdim getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, 140311116Sdim bool ShouldLazyLoadMetadata = false, 141311116Sdim bool IsImporting = false); 142311116Sdim 143311116Sdim /// Like getLazyBitcodeModule, except that the module takes ownership of 144311116Sdim /// the memory buffer if successful. If successful, this moves Buffer. On 145311116Sdim /// error, this *does not* move Buffer. If IsImporting is true, this module is 146311116Sdim /// being parsed for ThinLTO importing into another module. 147311116Sdim Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule( 148311116Sdim std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context, 149311116Sdim bool ShouldLazyLoadMetadata = false, bool IsImporting = false); 150311116Sdim 151311116Sdim /// Read the header of the specified bitcode buffer and extract just the 152311116Sdim /// triple information. If successful, this returns a string. On error, this 153311116Sdim /// returns "". 154311116Sdim Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer); 155311116Sdim 156311116Sdim /// Return true if \p Buffer contains a bitcode file with ObjC code (category 157311116Sdim /// or class) in it. 158311116Sdim Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer); 159311116Sdim 160311116Sdim /// Read the header of the specified bitcode buffer and extract just the 161311116Sdim /// producer string information. If successful, this returns a string. On 162311116Sdim /// error, this returns "". 163311116Sdim Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer); 164311116Sdim 165311116Sdim /// Read the specified bitcode file, returning the module. 166311116Sdim Expected<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer, 167311116Sdim LLVMContext &Context); 168311116Sdim 169321369Sdim /// Returns LTO information for the specified bitcode file. 170321369Sdim Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer); 171311116Sdim 172311116Sdim /// Parse the specified bitcode buffer, returning the module summary index. 173311116Sdim Expected<std::unique_ptr<ModuleSummaryIndex>> 174311116Sdim getModuleSummaryIndex(MemoryBufferRef Buffer); 175311116Sdim 176321369Sdim /// Parse the specified bitcode buffer and merge the index into CombinedIndex. 177321369Sdim Error readModuleSummaryIndex(MemoryBufferRef Buffer, 178321369Sdim ModuleSummaryIndex &CombinedIndex, 179321369Sdim uint64_t ModuleId); 180321369Sdim 181321369Sdim /// Parse the module summary index out of an IR file and return the module 182321369Sdim /// summary index object if found, or an empty summary if not. If Path refers 183321369Sdim /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then 184321369Sdim /// this function will return nullptr. 185321369Sdim Expected<std::unique_ptr<ModuleSummaryIndex>> 186321369Sdim getModuleSummaryIndexForFile(StringRef Path, 187321369Sdim bool IgnoreEmptyThinLTOIndexFile = false); 188321369Sdim 189311116Sdim /// isBitcodeWrapper - Return true if the given bytes are the magic bytes 190311116Sdim /// for an LLVM IR bitcode wrapper. 191311116Sdim inline bool isBitcodeWrapper(const unsigned char *BufPtr, 192311116Sdim const unsigned char *BufEnd) { 193311116Sdim // See if you can find the hidden message in the magic bytes :-). 194311116Sdim // (Hint: it's a little-endian encoding.) 195311116Sdim return BufPtr != BufEnd && 196311116Sdim BufPtr[0] == 0xDE && 197311116Sdim BufPtr[1] == 0xC0 && 198311116Sdim BufPtr[2] == 0x17 && 199311116Sdim BufPtr[3] == 0x0B; 200311116Sdim } 201311116Sdim 202311116Sdim /// isRawBitcode - Return true if the given bytes are the magic bytes for 203311116Sdim /// raw LLVM IR bitcode (without a wrapper). 204311116Sdim inline bool isRawBitcode(const unsigned char *BufPtr, 205311116Sdim const unsigned char *BufEnd) { 206311116Sdim // These bytes sort of have a hidden message, but it's not in 207311116Sdim // little-endian this time, and it's a little redundant. 208311116Sdim return BufPtr != BufEnd && 209311116Sdim BufPtr[0] == 'B' && 210311116Sdim BufPtr[1] == 'C' && 211311116Sdim BufPtr[2] == 0xc0 && 212311116Sdim BufPtr[3] == 0xde; 213311116Sdim } 214311116Sdim 215311116Sdim /// isBitcode - Return true if the given bytes are the magic bytes for 216311116Sdim /// LLVM IR bitcode, either with or without a wrapper. 217311116Sdim inline bool isBitcode(const unsigned char *BufPtr, 218311116Sdim const unsigned char *BufEnd) { 219311116Sdim return isBitcodeWrapper(BufPtr, BufEnd) || 220311116Sdim isRawBitcode(BufPtr, BufEnd); 221311116Sdim } 222311116Sdim 223311116Sdim /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special 224311116Sdim /// header for padding or other reasons. The format of this header is: 225311116Sdim /// 226311116Sdim /// struct bc_header { 227311116Sdim /// uint32_t Magic; // 0x0B17C0DE 228311116Sdim /// uint32_t Version; // Version, currently always 0. 229311116Sdim /// uint32_t BitcodeOffset; // Offset to traditional bitcode file. 230311116Sdim /// uint32_t BitcodeSize; // Size of traditional bitcode file. 231311116Sdim /// ... potentially other gunk ... 232311116Sdim /// }; 233311116Sdim /// 234311116Sdim /// This function is called when we find a file with a matching magic number. 235311116Sdim /// In this case, skip down to the subsection of the file that is actually a 236311116Sdim /// BC file. 237311116Sdim /// If 'VerifyBufferSize' is true, check that the buffer is large enough to 238311116Sdim /// contain the whole bitcode file. 239311116Sdim inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, 240311116Sdim const unsigned char *&BufEnd, 241311116Sdim bool VerifyBufferSize) { 242311116Sdim // Must contain the offset and size field! 243311116Sdim if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4) 244311116Sdim return true; 245311116Sdim 246311116Sdim unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); 247311116Sdim unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); 248311116Sdim uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size; 249311116Sdim 250311116Sdim // Verify that Offset+Size fits in the file. 251311116Sdim if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr)) 252311116Sdim return true; 253311116Sdim BufPtr += Offset; 254311116Sdim BufEnd = BufPtr+Size; 255311116Sdim return false; 256311116Sdim } 257311116Sdim 258311116Sdim const std::error_category &BitcodeErrorCategory(); 259311116Sdim enum class BitcodeError { CorruptedBitcode = 1 }; 260311116Sdim inline std::error_code make_error_code(BitcodeError E) { 261311116Sdim return std::error_code(static_cast<int>(E), BitcodeErrorCategory()); 262311116Sdim } 263311116Sdim 264327952Sdim} // end namespace llvm 265311116Sdim 266311116Sdimnamespace std { 267327952Sdim 268311116Sdimtemplate <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {}; 269311116Sdim 270327952Sdim} // end namespace std 271327952Sdim 272327952Sdim#endif // LLVM_BITCODE_BITCODEREADER_H 273