Magic.cpp revision 344779
1227825Stheraven//===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
2227825Stheraven//
3227825Stheraven//                     The LLVM Compiler Infrastructure
4227825Stheraven//
5227825Stheraven// This file is distributed under the University of Illinois Open Source
6227825Stheraven// License. See LICENSE.TXT for details.
7227825Stheraven//
8227825Stheraven//===----------------------------------------------------------------------===//
9227825Stheraven
10227825Stheraven#include "llvm/BinaryFormat/Magic.h"
11227825Stheraven
12227825Stheraven#include "llvm/BinaryFormat/COFF.h"
13227825Stheraven#include "llvm/BinaryFormat/ELF.h"
14227825Stheraven#include "llvm/BinaryFormat/MachO.h"
15227825Stheraven#include "llvm/Support/Endian.h"
16227825Stheraven#include "llvm/Support/FileSystem.h"
17227825Stheraven#include "llvm/Support/MemoryBuffer.h"
18227825Stheraven
19227825Stheraven#if !defined(_MSC_VER) && !defined(__MINGW32__)
20227825Stheraven#include <unistd.h>
21227825Stheraven#else
22227825Stheraven#include <io.h>
23227825Stheraven#endif
24227825Stheraven
25227825Stheravenusing namespace llvm;
26227825Stheravenusing namespace llvm::support::endian;
27227825Stheravenusing namespace llvm::sys::fs;
28227825Stheraven
29227825Stheraventemplate <size_t N>
30227825Stheravenstatic bool startswith(StringRef Magic, const char (&S)[N]) {
31227825Stheraven  return Magic.startswith(StringRef(S, N - 1));
32227825Stheraven}
33227825Stheraven
34227825Stheraven/// Identify the magic in magic.
35227825Stheravenfile_magic llvm::identify_magic(StringRef Magic) {
36227825Stheraven  if (Magic.size() < 4)
37227825Stheraven    return file_magic::unknown;
38227825Stheraven  switch ((unsigned char)Magic[0]) {
39227825Stheraven  case 0x00: {
40227825Stheraven    // COFF bigobj, CL.exe's LTO object file, or short import library file
41227825Stheraven    if (startswith(Magic, "\0\0\xFF\xFF")) {
42227825Stheraven      size_t MinSize =
43227825Stheraven          offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic);
44227825Stheraven      if (Magic.size() < MinSize)
45227825Stheraven        return file_magic::coff_import_library;
46227825Stheraven
47227825Stheraven      const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
48227825Stheraven      if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
49227825Stheraven        return file_magic::coff_object;
50227825Stheraven      if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
51227825Stheraven        return file_magic::coff_cl_gl_object;
52227825Stheraven      return file_magic::coff_import_library;
53227825Stheraven    }
54227825Stheraven    // Windows resource file
55227825Stheraven    if (Magic.size() >= sizeof(COFF::WinResMagic) &&
56227825Stheraven        memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0)
57227825Stheraven      return file_magic::windows_resource;
58227825Stheraven    // 0x0000 = COFF unknown machine type
59227825Stheraven    if (Magic[1] == 0)
60227825Stheraven      return file_magic::coff_object;
61227825Stheraven    if (startswith(Magic, "\0asm"))
62227825Stheraven      return file_magic::wasm_object;
63227825Stheraven    break;
64227825Stheraven  }
65227825Stheraven  case 0xDE: // 0x0B17C0DE = BC wraper
66227825Stheraven    if (startswith(Magic, "\xDE\xC0\x17\x0B"))
67227825Stheraven      return file_magic::bitcode;
68227825Stheraven    break;
69227825Stheraven  case 'B':
70227825Stheraven    if (startswith(Magic, "BC\xC0\xDE"))
71227825Stheraven      return file_magic::bitcode;
72227825Stheraven    break;
73227825Stheraven  case '!':
74227825Stheraven    if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
75227825Stheraven      return file_magic::archive;
76227825Stheraven    break;
77227825Stheraven
78227825Stheraven  case '\177':
79227825Stheraven    if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
80227825Stheraven      bool Data2MSB = Magic[5] == 2;
81227825Stheraven      unsigned high = Data2MSB ? 16 : 17;
82227825Stheraven      unsigned low = Data2MSB ? 17 : 16;
83227825Stheraven      if (Magic[high] == 0) {
84227825Stheraven        switch (Magic[low]) {
85227825Stheraven        default:
86227825Stheraven          return file_magic::elf;
87227825Stheraven        case 1:
88227825Stheraven          return file_magic::elf_relocatable;
89227825Stheraven        case 2:
90227825Stheraven          return file_magic::elf_executable;
91227825Stheraven        case 3:
92227825Stheraven          return file_magic::elf_shared_object;
93227825Stheraven        case 4:
94227825Stheraven          return file_magic::elf_core;
95227825Stheraven        }
96227825Stheraven      }
97227825Stheraven      // It's still some type of ELF file.
98227825Stheraven      return file_magic::elf;
99227825Stheraven    }
100227825Stheraven    break;
101227825Stheraven
102227825Stheraven  case 0xCA:
103227825Stheraven    if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
104227825Stheraven        startswith(Magic, "\xCA\xFE\xBA\xBF")) {
105227825Stheraven      // This is complicated by an overlap with Java class files.
106227825Stheraven      // See the Mach-O section in /usr/share/file/magic for details.
107227825Stheraven      if (Magic.size() >= 8 && Magic[7] < 43)
108227825Stheraven        return file_magic::macho_universal_binary;
109227825Stheraven    }
110227825Stheraven    break;
111227825Stheraven
112227825Stheraven  // The two magic numbers for mach-o are:
113227825Stheraven  // 0xfeedface - 32-bit mach-o
114227825Stheraven  // 0xfeedfacf - 64-bit mach-o
115227825Stheraven  case 0xFE:
116227825Stheraven  case 0xCE:
117227825Stheraven  case 0xCF: {
118227825Stheraven    uint16_t type = 0;
119227825Stheraven    if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
120227825Stheraven        startswith(Magic, "\xFE\xED\xFA\xCF")) {
121227825Stheraven      /* Native endian */
122227825Stheraven      size_t MinSize;
123227825Stheraven      if (Magic[3] == char(0xCE))
124227825Stheraven        MinSize = sizeof(MachO::mach_header);
125227825Stheraven      else
126227825Stheraven        MinSize = sizeof(MachO::mach_header_64);
127227825Stheraven      if (Magic.size() >= MinSize)
128227825Stheraven        type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
129227825Stheraven    } else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
130227825Stheraven               startswith(Magic, "\xCF\xFA\xED\xFE")) {
131227825Stheraven      /* Reverse endian */
132227825Stheraven      size_t MinSize;
133227825Stheraven      if (Magic[0] == char(0xCE))
134227825Stheraven        MinSize = sizeof(MachO::mach_header);
135227825Stheraven      else
136227825Stheraven        MinSize = sizeof(MachO::mach_header_64);
137227825Stheraven      if (Magic.size() >= MinSize)
138227825Stheraven        type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12];
139227825Stheraven    }
140227825Stheraven    switch (type) {
141227825Stheraven    default:
142227825Stheraven      break;
143227825Stheraven    case 1:
144227825Stheraven      return file_magic::macho_object;
145227825Stheraven    case 2:
146227825Stheraven      return file_magic::macho_executable;
147227825Stheraven    case 3:
148227825Stheraven      return file_magic::macho_fixed_virtual_memory_shared_lib;
149227825Stheraven    case 4:
150227825Stheraven      return file_magic::macho_core;
151227825Stheraven    case 5:
152227825Stheraven      return file_magic::macho_preload_executable;
153227825Stheraven    case 6:
154227825Stheraven      return file_magic::macho_dynamically_linked_shared_lib;
155227825Stheraven    case 7:
156227825Stheraven      return file_magic::macho_dynamic_linker;
157227825Stheraven    case 8:
158227825Stheraven      return file_magic::macho_bundle;
159227825Stheraven    case 9:
160227825Stheraven      return file_magic::macho_dynamically_linked_shared_lib_stub;
161227825Stheraven    case 10:
162227825Stheraven      return file_magic::macho_dsym_companion;
163227825Stheraven    case 11:
164227825Stheraven      return file_magic::macho_kext_bundle;
165227825Stheraven    }
166227825Stheraven    break;
167227825Stheraven  }
168227825Stheraven  case 0xF0: // PowerPC Windows
169227825Stheraven  case 0x83: // Alpha 32-bit
170227825Stheraven  case 0x84: // Alpha 64-bit
171227825Stheraven  case 0x66: // MPS R4000 Windows
172227825Stheraven  case 0x50: // mc68K
173227825Stheraven  case 0x4c: // 80386 Windows
174227825Stheraven  case 0xc4: // ARMNT Windows
175227825Stheraven    if (Magic[1] == 0x01)
176227825Stheraven      return file_magic::coff_object;
177227825Stheraven    LLVM_FALLTHROUGH;
178227825Stheraven
179227825Stheraven  case 0x90: // PA-RISC Windows
180227825Stheraven  case 0x68: // mc68K Windows
181227825Stheraven    if (Magic[1] == 0x02)
182227825Stheraven      return file_magic::coff_object;
183227825Stheraven    break;
184227825Stheraven
185227825Stheraven  case 'M': // Possible MS-DOS stub on Windows PE file or MSF/PDB file.
186227825Stheraven    if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
187227825Stheraven      uint32_t off = read32le(Magic.data() + 0x3c);
188227825Stheraven      // PE/COFF file, either EXE or DLL.
189227825Stheraven      if (Magic.substr(off).startswith(
190227825Stheraven              StringRef(COFF::PEMagic, sizeof(COFF::PEMagic))))
191227825Stheraven        return file_magic::pecoff_executable;
192227825Stheraven    }
193227825Stheraven    if (Magic.startswith("Microsoft C/C++ MSF 7.00\r\n"))
194227825Stheraven      return file_magic::pdb;
195227825Stheraven    break;
196227825Stheraven
197227825Stheraven  case 0x64: // x86-64 or ARM64 Windows.
198227825Stheraven    if (Magic[1] == char(0x86) || Magic[1] == char(0xaa))
199227825Stheraven      return file_magic::coff_object;
200227825Stheraven    break;
201227825Stheraven
202227825Stheraven  default:
203227825Stheraven    break;
204227825Stheraven  }
205227825Stheraven  return file_magic::unknown;
206227825Stheraven}
207227825Stheraven
208227825Stheravenstd::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
209262801Sdim  auto FileOrError = MemoryBuffer::getFile(Path, -1LL, false);
210262801Sdim  if (!FileOrError)
211262801Sdim    return FileOrError.getError();
212262801Sdim
213227825Stheraven  std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
214262801Sdim  Result = identify_magic(FileBuffer->getBuffer());
215227825Stheraven
216227825Stheraven  return std::error_code();
217227825Stheraven}
218227825Stheraven