LLVM  15.0.0git
Magic.cpp
Go to the documentation of this file.
1 //===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/StringRef.h"
11 #include "llvm/ADT/Twine.h"
12 #include "llvm/BinaryFormat/COFF.h"
14 #include "llvm/Support/Endian.h"
16 
17 #if !defined(_MSC_VER) && !defined(__MINGW32__)
18 #include <unistd.h>
19 #else
20 #include <io.h>
21 #endif
22 
23 using namespace llvm;
24 using namespace llvm::support::endian;
25 using namespace llvm::sys::fs;
26 
27 template <size_t N>
28 static bool startswith(StringRef Magic, const char (&S)[N]) {
29  return Magic.startswith(StringRef(S, N - 1));
30 }
31 
32 /// Identify the magic in magic.
34  if (Magic.size() < 4)
35  return file_magic::unknown;
36  switch ((unsigned char)Magic[0]) {
37  case 0x00: {
38  // COFF bigobj, CL.exe's LTO object file, or short import library file
39  if (startswith(Magic, "\0\0\xFF\xFF")) {
40  size_t MinSize =
42  if (Magic.size() < MinSize)
44 
45  const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
46  if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
48  if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
51  }
52  // Windows resource file
53  if (Magic.size() >= sizeof(COFF::WinResMagic) &&
54  memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0)
56  // 0x0000 = COFF unknown machine type
57  if (Magic[1] == 0)
59  if (startswith(Magic, "\0asm"))
61  break;
62  }
63 
64  case 0x01:
65  // XCOFF format
66  if (startswith(Magic, "\x01\xDF"))
68  if (startswith(Magic, "\x01\xF7"))
70  break;
71 
72  case 0x03:
73  if (startswith(Magic, "\x03\xF0\x00"))
75  break;
76 
77  case 0xDE: // 0x0B17C0DE = BC wraper
78  if (startswith(Magic, "\xDE\xC0\x17\x0B"))
79  return file_magic::bitcode;
80  break;
81  case 'B':
82  if (startswith(Magic, "BC\xC0\xDE"))
83  return file_magic::bitcode;
84  break;
85  case '!':
86  if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
87  return file_magic::archive;
88  break;
89  case '<':
90  if (startswith(Magic, "<bigaf>\n"))
91  return file_magic::archive;
92  break;
93  case '\177':
94  if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
95  bool Data2MSB = Magic[5] == 2;
96  unsigned high = Data2MSB ? 16 : 17;
97  unsigned low = Data2MSB ? 17 : 16;
98  if (Magic[high] == 0) {
99  switch (Magic[low]) {
100  default:
101  return file_magic::elf;
102  case 1:
104  case 2:
106  case 3:
108  case 4:
109  return file_magic::elf_core;
110  }
111  }
112  // It's still some type of ELF file.
113  return file_magic::elf;
114  }
115  break;
116 
117  case 0xCA:
118  if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
119  startswith(Magic, "\xCA\xFE\xBA\xBF")) {
120  // This is complicated by an overlap with Java class files.
121  // See the Mach-O section in /usr/share/file/magic for details.
122  if (Magic.size() >= 8 && Magic[7] < 43)
124  }
125  break;
126 
127  // The two magic numbers for mach-o are:
128  // 0xfeedface - 32-bit mach-o
129  // 0xfeedfacf - 64-bit mach-o
130  case 0xFE:
131  case 0xCE:
132  case 0xCF: {
133  uint16_t type = 0;
134  if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
135  startswith(Magic, "\xFE\xED\xFA\xCF")) {
136  /* Native endian */
137  size_t MinSize;
138  if (Magic[3] == char(0xCE))
139  MinSize = sizeof(MachO::mach_header);
140  else
141  MinSize = sizeof(MachO::mach_header_64);
142  if (Magic.size() >= MinSize)
143  type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
144  } else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
145  startswith(Magic, "\xCF\xFA\xED\xFE")) {
146  /* Reverse endian */
147  size_t MinSize;
148  if (Magic[0] == char(0xCE))
149  MinSize = sizeof(MachO::mach_header);
150  else
151  MinSize = sizeof(MachO::mach_header_64);
152  if (Magic.size() >= MinSize)
153  type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12];
154  }
155  switch (type) {
156  default:
157  break;
158  case 1:
160  case 2:
162  case 3:
164  case 4:
165  return file_magic::macho_core;
166  case 5:
168  case 6:
170  case 7:
172  case 8:
174  case 9:
176  case 10:
178  case 11:
180  }
181  break;
182  }
183  case 0xF0: // PowerPC Windows
184  case 0x83: // Alpha 32-bit
185  case 0x84: // Alpha 64-bit
186  case 0x66: // MPS R4000 Windows
187  case 0x50: // mc68K
188  if (startswith(Magic, "\x50\xed\x55\xba"))
191 
192  case 0x4c: // 80386 Windows
193  case 0xc4: // ARMNT Windows
194  if (Magic[1] == 0x01)
197 
198  case 0x90: // PA-RISC Windows
199  case 0x68: // mc68K Windows
200  if (Magic[1] == 0x02)
202  break;
203 
204  case 'M': // Possible MS-DOS stub on Windows PE file, MSF/PDB file or a
205  // Minidump file.
206  if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
207  uint32_t off = read32le(Magic.data() + 0x3c);
208  // PE/COFF file, either EXE or DLL.
209  if (Magic.substr(off).startswith(
212  }
213  if (Magic.startswith("Microsoft C/C++ MSF 7.00\r\n"))
214  return file_magic::pdb;
215  if (startswith(Magic, "MDMP"))
216  return file_magic::minidump;
217  break;
218 
219  case 0x64: // x86-64 or ARM64 Windows.
220  if (Magic[1] == char(0x86) || Magic[1] == char(0xaa))
222  break;
223 
224  case 0x2d: // YAML '-'
225  if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:"))
226  return file_magic::tapi_file;
227  break;
228 
229  case 'D': // DirectX container file - DXBC
230  if (startswith(Magic, "DXBC"))
232  break;
233 
234  default:
235  break;
236  }
237  return file_magic::unknown;
238 }
239 
240 std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
241  auto FileOrError = MemoryBuffer::getFile(Path, /*IsText=*/false,
242  /*RequiresNullTerminator=*/false);
243  if (!FileOrError)
244  return FileOrError.getError();
245 
246  std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
247  Result = identify_magic(FileBuffer->getBuffer());
248 
249  return std::error_code();
250 }
MemoryBuffer.h
llvm::file_magic::unknown
@ unknown
Unrecognized file.
Definition: Magic.h:22
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::file_magic
file_magic - An "enum class" enumeration of file types based on magic (the first N bytes of the file)...
Definition: Magic.h:20
type
llvm::file_magic::cuda_fatbinary
@ cuda_fatbinary
CUDA Fatbinary object file.
Definition: Magic.h:54
StringRef.h
llvm::file_magic::elf_core
@ elf_core
ELF core image.
Definition: Magic.h:29
llvm::file_magic::pdb
@ pdb
Windows PDB debug info file.
Definition: Magic.h:52
offsetof
#define offsetof(TYPE, MEMBER)
Definition: AMDHSAKernelDescriptor.h:23
llvm::file_magic::macho_dynamic_linker
@ macho_dynamic_linker
The Mach-O dynamic linker.
Definition: Magic.h:37
llvm::file_magic::elf_shared_object
@ elf_shared_object
ELF dynamically linked shared lib.
Definition: Magic.h:28
memcmp
Merge contiguous icmps into a memcmp
Definition: MergeICmps.cpp:899
COFF.h
llvm::file_magic::goff_object
@ goff_object
GOFF object file.
Definition: Magic.h:30
startswith
static bool startswith(StringRef Magic, const char(&S)[N])
Definition: Magic.cpp:28
llvm::COFF::BigObjHeader
Definition: COFF.h:74
llvm::file_magic::minidump
@ minidump
Windows minidump file.
Definition: Magic.h:43
x86
Note that only the low bits of effective_addr2 are used On bit we don t eliminate the computation of the top half of effective_addr2 because we don t have whole function selection dags On x86
Definition: README.txt:318
llvm::COFF::PEMagic
static const char PEMagic[]
Definition: COFF.h:35
llvm::file_magic::bitcode
@ bitcode
Bitcode file.
Definition: Magic.h:23
llvm::support::endian
Definition: Endian.h:42
llvm::MemoryBuffer::getFile
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Definition: MemoryBuffer.cpp:239
Magic.h
Twine.h
llvm::file_magic::xcoff_object_64
@ xcoff_object_64
64-bit XCOFF object file
Definition: Magic.h:50
llvm::file_magic::macho_preload_executable
@ macho_preload_executable
Mach-O Preloaded Executable.
Definition: Magic.h:35
llvm::file_magic::macho_bundle
@ macho_bundle
Mach-O Bundle file.
Definition: Magic.h:38
llvm::file_magic::macho_core
@ macho_core
Mach-O Core File.
Definition: Magic.h:34
llvm::file_magic::macho_dsym_companion
@ macho_dsym_companion
Mach-O dSYM companion file.
Definition: Magic.h:40
llvm::file_magic::dxcontainer_object
@ dxcontainer_object
DirectX container file.
Definition: Magic.h:55
llvm::COFF::WinResMagic
static const char WinResMagic[]
Definition: COFF.h:48
llvm::sys::fs
Definition: UniqueID.h:24
llvm::file_magic::elf_relocatable
@ elf_relocatable
ELF Relocatable object file.
Definition: Magic.h:26
llvm::file_magic::coff_import_library
@ coff_import_library
COFF import library.
Definition: Magic.h:46
llvm::MachO::mach_header_64
Definition: MachO.h:521
llvm::file_magic::pecoff_executable
@ pecoff_executable
PECOFF executable file.
Definition: Magic.h:47
llvm::file_magic::macho_universal_binary
@ macho_universal_binary
Mach-O universal binary.
Definition: Magic.h:42
llvm::file_magic::macho_dynamically_linked_shared_lib
@ macho_dynamically_linked_shared_lib
Mach-O dynlinked shared lib.
Definition: Magic.h:36
llvm::file_magic::macho_dynamically_linked_shared_lib_stub
@ macho_dynamically_linked_shared_lib_stub
Mach-O Shared lib stub.
Definition: Magic.h:39
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::file_magic::coff_object
@ coff_object
COFF object file.
Definition: Magic.h:45
UUID
std::pair< llvm::MachO::Target, std::string > UUID
Definition: TextStubCommon.h:23
llvm::COFF::BigObjMagic
static const char BigObjMagic[]
Definition: COFF.h:37
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::msf::Magic
static const char Magic[]
Definition: MSFCommon.h:23
llvm::file_magic::macho_kext_bundle
@ macho_kext_bundle
Mach-O kext bundle file.
Definition: Magic.h:41
uint32_t
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:280
llvm::file_magic::macho_fixed_virtual_memory_shared_lib
@ macho_fixed_virtual_memory_shared_lib
Mach-O Shared Lib, FVM.
Definition: Magic.h:33
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
uint16_t
llvm::file_magic::elf
@ elf
ELF Unknown type.
Definition: Magic.h:25
llvm::file_magic::archive
@ archive
ar style archive file
Definition: Magic.h:24
llvm::file_magic::elf_executable
@ elf_executable
ELF Executable image.
Definition: Magic.h:27
llvm::MachO::mach_header
Definition: MachO.h:511
llvm::support::endian::read32le
uint32_t read32le(const void *P)
Definition: Endian.h:381
llvm::COFF::ClGlObjMagic
static const char ClGlObjMagic[]
Definition: COFF.h:42
N
#define N
llvm::file_magic::wasm_object
@ wasm_object
WebAssembly Object file.
Definition: Magic.h:51
llvm::file_magic::coff_cl_gl_object
@ coff_cl_gl_object
Microsoft cl.exe's intermediate code file.
Definition: Magic.h:44
llvm::file_magic::macho_executable
@ macho_executable
Mach-O Executable.
Definition: Magic.h:32
MachO.h
llvm::file_magic::tapi_file
@ tapi_file
Text-based Dynamic Library Stub file.
Definition: Magic.h:53
llvm::identify_magic
file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
Definition: Magic.cpp:33
Endian.h
llvm::file_magic::windows_resource
@ windows_resource
Windows compiled resource file (.res)
Definition: Magic.h:48
llvm::file_magic::macho_object
@ macho_object
Mach-O Object file.
Definition: Magic.h:31
llvm::file_magic::xcoff_object_32
@ xcoff_object_32
32-bit XCOFF object file
Definition: Magic.h:49