LLVM  14.0.0git
Magic.cpp
Go to the documentation of this file.
1 //===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/StringRef.h"
11 #include "llvm/ADT/Twine.h"
12 #include "llvm/BinaryFormat/COFF.h"
13 #include "llvm/BinaryFormat/ELF.h"
15 #include "llvm/Support/Endian.h"
18 
19 #if !defined(_MSC_VER) && !defined(__MINGW32__)
20 #include <unistd.h>
21 #else
22 #include <io.h>
23 #endif
24 
25 using namespace llvm;
26 using namespace llvm::support::endian;
27 using namespace llvm::sys::fs;
28 
29 template <size_t N>
30 static bool startswith(StringRef Magic, const char (&S)[N]) {
31  return Magic.startswith(StringRef(S, N - 1));
32 }
33 
34 /// Identify the magic in magic.
36  if (Magic.size() < 4)
37  return file_magic::unknown;
38  switch ((unsigned char)Magic[0]) {
39  case 0x00: {
40  // COFF bigobj, CL.exe's LTO object file, or short import library file
41  if (startswith(Magic, "\0\0\xFF\xFF")) {
42  size_t MinSize =
44  if (Magic.size() < MinSize)
46 
47  const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
48  if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
50  if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
53  }
54  // Windows resource file
55  if (Magic.size() >= sizeof(COFF::WinResMagic) &&
56  memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0)
58  // 0x0000 = COFF unknown machine type
59  if (Magic[1] == 0)
61  if (startswith(Magic, "\0asm"))
63  break;
64  }
65 
66  case 0x01:
67  // XCOFF format
68  if (startswith(Magic, "\x01\xDF"))
70  if (startswith(Magic, "\x01\xF7"))
72  break;
73 
74  case 0x03:
75  if (startswith(Magic, "\x03\xF0\x00"))
77  break;
78 
79  case 0xDE: // 0x0B17C0DE = BC wraper
80  if (startswith(Magic, "\xDE\xC0\x17\x0B"))
81  return file_magic::bitcode;
82  break;
83  case 'B':
84  if (startswith(Magic, "BC\xC0\xDE"))
85  return file_magic::bitcode;
86  break;
87  case '!':
88  if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
89  return file_magic::archive;
90  break;
91 
92  case '\177':
93  if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
94  bool Data2MSB = Magic[5] == 2;
95  unsigned high = Data2MSB ? 16 : 17;
96  unsigned low = Data2MSB ? 17 : 16;
97  if (Magic[high] == 0) {
98  switch (Magic[low]) {
99  default:
100  return file_magic::elf;
101  case 1:
103  case 2:
105  case 3:
107  case 4:
108  return file_magic::elf_core;
109  }
110  }
111  // It's still some type of ELF file.
112  return file_magic::elf;
113  }
114  break;
115 
116  case 0xCA:
117  if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
118  startswith(Magic, "\xCA\xFE\xBA\xBF")) {
119  // This is complicated by an overlap with Java class files.
120  // See the Mach-O section in /usr/share/file/magic for details.
121  if (Magic.size() >= 8 && Magic[7] < 43)
123  }
124  break;
125 
126  // The two magic numbers for mach-o are:
127  // 0xfeedface - 32-bit mach-o
128  // 0xfeedfacf - 64-bit mach-o
129  case 0xFE:
130  case 0xCE:
131  case 0xCF: {
132  uint16_t type = 0;
133  if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
134  startswith(Magic, "\xFE\xED\xFA\xCF")) {
135  /* Native endian */
136  size_t MinSize;
137  if (Magic[3] == char(0xCE))
138  MinSize = sizeof(MachO::mach_header);
139  else
140  MinSize = sizeof(MachO::mach_header_64);
141  if (Magic.size() >= MinSize)
142  type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
143  } else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
144  startswith(Magic, "\xCF\xFA\xED\xFE")) {
145  /* Reverse endian */
146  size_t MinSize;
147  if (Magic[0] == char(0xCE))
148  MinSize = sizeof(MachO::mach_header);
149  else
150  MinSize = sizeof(MachO::mach_header_64);
151  if (Magic.size() >= MinSize)
152  type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12];
153  }
154  switch (type) {
155  default:
156  break;
157  case 1:
159  case 2:
161  case 3:
163  case 4:
164  return file_magic::macho_core;
165  case 5:
167  case 6:
169  case 7:
171  case 8:
173  case 9:
175  case 10:
177  case 11:
179  }
180  break;
181  }
182  case 0xF0: // PowerPC Windows
183  case 0x83: // Alpha 32-bit
184  case 0x84: // Alpha 64-bit
185  case 0x66: // MPS R4000 Windows
186  case 0x50: // mc68K
187  case 0x4c: // 80386 Windows
188  case 0xc4: // ARMNT Windows
189  if (Magic[1] == 0x01)
192 
193  case 0x90: // PA-RISC Windows
194  case 0x68: // mc68K Windows
195  if (Magic[1] == 0x02)
197  break;
198 
199  case 'M': // Possible MS-DOS stub on Windows PE file, MSF/PDB file or a
200  // Minidump file.
201  if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
202  uint32_t off = read32le(Magic.data() + 0x3c);
203  // PE/COFF file, either EXE or DLL.
204  if (Magic.substr(off).startswith(
207  }
208  if (Magic.startswith("Microsoft C/C++ MSF 7.00\r\n"))
209  return file_magic::pdb;
210  if (startswith(Magic, "MDMP"))
211  return file_magic::minidump;
212  break;
213 
214  case 0x64: // x86-64 or ARM64 Windows.
215  if (Magic[1] == char(0x86) || Magic[1] == char(0xaa))
217  break;
218 
219  case 0x2d: // YAML '-'
220  if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:"))
221  return file_magic::tapi_file;
222  break;
223 
224  default:
225  break;
226  }
227  return file_magic::unknown;
228 }
229 
230 std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
231  auto FileOrError = MemoryBuffer::getFile(Path, /*IsText=*/false,
232  /*RequiresNullTerminator=*/false);
233  if (!FileOrError)
234  return FileOrError.getError();
235 
236  std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
237  Result = identify_magic(FileBuffer->getBuffer());
238 
239  return std::error_code();
240 }
MemoryBuffer.h
llvm::file_magic::unknown
@ unknown
Unrecognized file.
Definition: Magic.h:22
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
FileSystem.h
llvm::file_magic
file_magic - An "enum class" enumeration of file types based on magic (the first N bytes of the file)...
Definition: Magic.h:20
type
StringRef.h
llvm::file_magic::elf_core
@ elf_core
ELF core image.
Definition: Magic.h:29
llvm::file_magic::pdb
@ pdb
Windows PDB debug info file.
Definition: Magic.h:52
offsetof
#define offsetof(TYPE, MEMBER)
Definition: AMDHSAKernelDescriptor.h:23
llvm::file_magic::macho_dynamic_linker
@ macho_dynamic_linker
The Mach-O dynamic linker.
Definition: Magic.h:37
llvm::file_magic::elf_shared_object
@ elf_shared_object
ELF dynamically linked shared lib.
Definition: Magic.h:28
memcmp
Merge contiguous icmps into a memcmp
Definition: MergeICmps.cpp:888
COFF.h
llvm::file_magic::goff_object
@ goff_object
GOFF object file.
Definition: Magic.h:30
startswith
static bool startswith(StringRef Magic, const char(&S)[N])
Definition: Magic.cpp:30
llvm::COFF::BigObjHeader
Definition: COFF.h:75
llvm::file_magic::minidump
@ minidump
Windows minidump file.
Definition: Magic.h:43
x86
Note that only the low bits of effective_addr2 are used On bit we don t eliminate the computation of the top half of effective_addr2 because we don t have whole function selection dags On x86
Definition: README.txt:318
llvm::COFF::PEMagic
static const char PEMagic[]
Definition: COFF.h:36
llvm::file_magic::bitcode
@ bitcode
Bitcode file.
Definition: Magic.h:23
llvm::support::endian
Definition: Endian.h:42
llvm::MemoryBuffer::getFile
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Definition: MemoryBuffer.cpp:246
ELF.h
Magic.h
Twine.h
llvm::file_magic::xcoff_object_64
@ xcoff_object_64
64-bit XCOFF object file
Definition: Magic.h:50
llvm::file_magic::macho_preload_executable
@ macho_preload_executable
Mach-O Preloaded Executable.
Definition: Magic.h:35
llvm::file_magic::macho_bundle
@ macho_bundle
Mach-O Bundle file.
Definition: Magic.h:38
llvm::file_magic::macho_core
@ macho_core
Mach-O Core File.
Definition: Magic.h:34
llvm::file_magic::macho_dsym_companion
@ macho_dsym_companion
Mach-O dSYM companion file.
Definition: Magic.h:40
llvm::COFF::WinResMagic
static const char WinResMagic[]
Definition: COFF.h:49
llvm::sys::fs
Definition: UniqueID.h:24
llvm::file_magic::elf_relocatable
@ elf_relocatable
ELF Relocatable object file.
Definition: Magic.h:26
llvm::file_magic::coff_import_library
@ coff_import_library
COFF import library.
Definition: Magic.h:46
llvm::MachO::mach_header_64
Definition: MachO.h:519
llvm::file_magic::pecoff_executable
@ pecoff_executable
PECOFF executable file.
Definition: Magic.h:47
llvm::file_magic::macho_universal_binary
@ macho_universal_binary
Mach-O universal binary.
Definition: Magic.h:42
llvm::file_magic::macho_dynamically_linked_shared_lib
@ macho_dynamically_linked_shared_lib
Mach-O dynlinked shared lib.
Definition: Magic.h:36
llvm::file_magic::macho_dynamically_linked_shared_lib_stub
@ macho_dynamically_linked_shared_lib_stub
Mach-O Shared lib stub.
Definition: Magic.h:39
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::file_magic::coff_object
@ coff_object
COFF object file.
Definition: Magic.h:45
Magic
const char Magic[]
Definition: Archive.cpp:41
UUID
std::pair< llvm::MachO::Target, std::string > UUID
Definition: TextStubCommon.h:23
llvm::COFF::BigObjMagic
static const char BigObjMagic[]
Definition: COFF.h:38
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::file_magic::macho_kext_bundle
@ macho_kext_bundle
Mach-O kext bundle file.
Definition: Magic.h:41
uint32_t
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:286
llvm::file_magic::macho_fixed_virtual_memory_shared_lib
@ macho_fixed_virtual_memory_shared_lib
Mach-O Shared Lib, FVM.
Definition: Magic.h:33
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
uint16_t
llvm::file_magic::elf
@ elf
ELF Unknown type.
Definition: Magic.h:25
llvm::file_magic::archive
@ archive
ar style archive file
Definition: Magic.h:24
llvm::file_magic::elf_executable
@ elf_executable
ELF Executable image.
Definition: Magic.h:27
llvm::MachO::mach_header
Definition: MachO.h:509
llvm::support::endian::read32le
uint32_t read32le(const void *P)
Definition: Endian.h:381
llvm::COFF::ClGlObjMagic
static const char ClGlObjMagic[]
Definition: COFF.h:43
N
#define N
llvm::file_magic::wasm_object
@ wasm_object
WebAssembly Object file.
Definition: Magic.h:51
llvm::file_magic::coff_cl_gl_object
@ coff_cl_gl_object
Microsoft cl.exe's intermediate code file.
Definition: Magic.h:44
llvm::file_magic::macho_executable
@ macho_executable
Mach-O Executable.
Definition: Magic.h:32
MachO.h
llvm::file_magic::tapi_file
@ tapi_file
Text-based Dynamic Library Stub file.
Definition: Magic.h:53
llvm::identify_magic
file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
Definition: Magic.cpp:35
Endian.h
llvm::file_magic::windows_resource
@ windows_resource
Windows compiled resource file (.res)
Definition: Magic.h:48
llvm::file_magic::macho_object
@ macho_object
Mach-O Object file.
Definition: Magic.h:31
llvm::file_magic::xcoff_object_32
@ xcoff_object_32
32-bit XCOFF object file
Definition: Magic.h:49