LLVM  17.0.0git
X86Disassembler.cpp
Go to the documentation of this file.
1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is part of the X86 Disassembler.
10 // It contains code to translate the data produced by the decoder into
11 // MCInsts.
12 //
13 //
14 // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
15 // 64-bit X86 instruction sets. The main decode sequence for an assembly
16 // instruction in this disassembler is:
17 //
18 // 1. Read the prefix bytes and determine the attributes of the instruction.
19 // These attributes, recorded in enum attributeBits
20 // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
21 // provides a mapping from bitmasks to contexts, which are represented by
22 // enum InstructionContext (ibid.).
23 //
24 // 2. Read the opcode, and determine what kind of opcode it is. The
25 // disassembler distinguishes four kinds of opcodes, which are enumerated in
26 // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
27 // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
28 // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
29 //
30 // 3. Depending on the opcode type, look in one of four ClassDecision structures
31 // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
32 // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
33 // a ModRMDecision (ibid.).
34 //
35 // 4. Some instructions, such as escape opcodes or extended opcodes, or even
36 // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
37 // ModR/M byte to complete decode. The ModRMDecision's type is an entry from
38 // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
39 // ModR/M byte is required and how to interpret it.
40 //
41 // 5. After resolving the ModRMDecision, the disassembler has a unique ID
42 // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
43 // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
44 // meanings of its operands.
45 //
46 // 6. For each operand, its encoding is an entry from OperandEncoding
47 // (X86DisassemblerDecoderCommon.h) and its type is an entry from
48 // OperandType (ibid.). The encoding indicates how to read it from the
49 // instruction; the type indicates how to interpret the value once it has
50 // been read. For example, a register operand could be stored in the R/M
51 // field of the ModR/M byte, the REG field of the ModR/M byte, or added to
52 // the main opcode. This is orthogonal from its meaning (an GPR or an XMM
53 // register, for instance). Given this information, the operands can be
54 // extracted and interpreted.
55 //
56 // 7. As the last step, the disassembler translates the instruction information
57 // and operands into a format understandable by the client - in this case, an
58 // MCInst for use by the MC infrastructure.
59 //
60 // The disassembler is broken broadly into two parts: the table emitter that
61 // emits the instruction decode tables discussed above during compilation, and
62 // the disassembler itself. The table emitter is documented in more detail in
63 // utils/TableGen/X86DisassemblerEmitter.h.
64 //
65 // X86Disassembler.cpp contains the code responsible for step 7, and for
66 // invoking the decoder to execute steps 1-6.
67 // X86DisassemblerDecoderCommon.h contains the definitions needed by both the
68 // table emitter and the disassembler.
69 // X86DisassemblerDecoder.h contains the public interface of the decoder,
70 // factored out into C for possible use by other projects.
71 // X86DisassemblerDecoder.c contains the source code of the decoder, which is
72 // responsible for steps 1-6.
73 //
74 //===----------------------------------------------------------------------===//
75 
79 #include "X86DisassemblerDecoder.h"
80 #include "llvm/MC/MCContext.h"
82 #include "llvm/MC/MCExpr.h"
83 #include "llvm/MC/MCInst.h"
84 #include "llvm/MC/MCInstrInfo.h"
86 #include "llvm/MC/TargetRegistry.h"
87 #include "llvm/Support/Debug.h"
88 #include "llvm/Support/Format.h"
90 
91 using namespace llvm;
92 using namespace llvm::X86Disassembler;
93 
94 #define DEBUG_TYPE "x86-disassembler"
95 
96 #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
97 
98 // Specifies whether a ModR/M byte is needed and (if so) which
99 // instruction each possible value of the ModR/M byte corresponds to. Once
100 // this information is known, we have narrowed down to a single instruction.
102  uint8_t modrm_type;
104 };
105 
106 // Specifies which set of ModR/M->instruction tables to look at
107 // given a particular opcode.
109  ModRMDecision modRMDecisions[256];
110 };
111 
112 // Specifies which opcode->instruction tables to look at given
113 // a particular context (set of attributes). Since there are many possible
114 // contexts, the decoder first uses CONTEXTS_SYM to determine which context
115 // applies given a specific set of attributes. Hence there are only IC_max
116 // entries in this table, rather than 2^(ATTR_max).
118  OpcodeDecision opcodeDecisions[IC_max];
119 };
120 
121 #include "X86GenDisassemblerTables.inc"
122 
124  uint8_t opcode, uint8_t modRM) {
125  const struct ModRMDecision *dec;
126 
127  switch (type) {
128  case ONEBYTE:
129  dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
130  break;
131  case TWOBYTE:
132  dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
133  break;
134  case THREEBYTE_38:
135  dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
136  break;
137  case THREEBYTE_3A:
138  dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
139  break;
140  case XOP8_MAP:
141  dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
142  break;
143  case XOP9_MAP:
144  dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
145  break;
146  case XOPA_MAP:
147  dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
148  break;
149  case THREEDNOW_MAP:
150  dec =
151  &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
152  break;
153  case MAP5:
154  dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
155  break;
156  case MAP6:
157  dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
158  break;
159  }
160 
161  switch (dec->modrm_type) {
162  default:
163  llvm_unreachable("Corrupt table! Unknown modrm_type");
164  return 0;
165  case MODRM_ONEENTRY:
166  return modRMTable[dec->instructionIDs];
167  case MODRM_SPLITRM:
168  if (modFromModRM(modRM) == 0x3)
169  return modRMTable[dec->instructionIDs + 1];
170  return modRMTable[dec->instructionIDs];
171  case MODRM_SPLITREG:
172  if (modFromModRM(modRM) == 0x3)
173  return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];
174  return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
175  case MODRM_SPLITMISC:
176  if (modFromModRM(modRM) == 0x3)
177  return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];
178  return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
179  case MODRM_FULL:
180  return modRMTable[dec->instructionIDs + modRM];
181  }
182 }
183 
184 static bool peek(struct InternalInstruction *insn, uint8_t &byte) {
185  uint64_t offset = insn->readerCursor - insn->startLocation;
186  if (offset >= insn->bytes.size())
187  return true;
188  byte = insn->bytes[offset];
189  return false;
190 }
191 
192 template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {
193  auto r = insn->bytes;
194  uint64_t offset = insn->readerCursor - insn->startLocation;
195  if (offset + sizeof(T) > r.size())
196  return true;
197  ptr = support::endian::read<T>(&r[offset], support::little);
198  insn->readerCursor += sizeof(T);
199  return false;
200 }
201 
202 static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
203  return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
204 }
205 
206 // Consumes all of an instruction's prefix bytes, and marks the
207 // instruction as having them. Also sets the instruction's default operand,
208 // address, and other relevant data sizes to report operands correctly.
209 //
210 // insn must not be empty.
211 static int readPrefixes(struct InternalInstruction *insn) {
212  bool isPrefix = true;
213  uint8_t byte = 0;
214  uint8_t nextByte;
215 
216  LLVM_DEBUG(dbgs() << "readPrefixes()");
217 
218  while (isPrefix) {
219  // If we fail reading prefixes, just stop here and let the opcode reader
220  // deal with it.
221  if (consume(insn, byte))
222  break;
223 
224  // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
225  // break and let it be disassembled as a normal "instruction".
226  if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
227  break;
228 
229  if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) {
230  // If the byte is 0xf2 or 0xf3, and any of the following conditions are
231  // met:
232  // - it is followed by a LOCK (0xf0) prefix
233  // - it is followed by an xchg instruction
234  // then it should be disassembled as a xacquire/xrelease not repne/rep.
235  if (((nextByte == 0xf0) ||
236  ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
237  insn->xAcquireRelease = true;
238  if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
239  break;
240  }
241  // Also if the byte is 0xf3, and the following condition is met:
242  // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
243  // "mov mem, imm" (opcode 0xc6/0xc7) instructions.
244  // then it should be disassembled as an xrelease not rep.
245  if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
246  nextByte == 0xc6 || nextByte == 0xc7)) {
247  insn->xAcquireRelease = true;
248  break;
249  }
250  if (isREX(insn, nextByte)) {
251  uint8_t nnextByte;
252  // Go to REX prefix after the current one
253  if (consume(insn, nnextByte))
254  return -1;
255  // We should be able to read next byte after REX prefix
256  if (peek(insn, nnextByte))
257  return -1;
258  --insn->readerCursor;
259  }
260  }
261 
262  switch (byte) {
263  case 0xf0: // LOCK
264  insn->hasLockPrefix = true;
265  break;
266  case 0xf2: // REPNE/REPNZ
267  case 0xf3: { // REP or REPE/REPZ
268  uint8_t nextByte;
269  if (peek(insn, nextByte))
270  break;
271  // TODO:
272  // 1. There could be several 0x66
273  // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
274  // it's not mandatory prefix
275  // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
276  // 0x0f exactly after it to be mandatory prefix
277  if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)
278  // The last of 0xf2 /0xf3 is mandatory prefix
279  insn->mandatoryPrefix = byte;
280  insn->repeatPrefix = byte;
281  break;
282  }
283  case 0x2e: // CS segment override -OR- Branch not taken
285  break;
286  case 0x36: // SS segment override -OR- Branch taken
288  break;
289  case 0x3e: // DS segment override
291  break;
292  case 0x26: // ES segment override
294  break;
295  case 0x64: // FS segment override
297  break;
298  case 0x65: // GS segment override
300  break;
301  case 0x66: { // Operand-size override {
302  uint8_t nextByte;
303  insn->hasOpSize = true;
304  if (peek(insn, nextByte))
305  break;
306  // 0x66 can't overwrite existing mandatory prefix and should be ignored
307  if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
308  insn->mandatoryPrefix = byte;
309  break;
310  }
311  case 0x67: // Address-size override
312  insn->hasAdSize = true;
313  break;
314  default: // Not a prefix byte
315  isPrefix = false;
316  break;
317  }
318 
319  if (isPrefix)
320  LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte));
321  }
322 
324 
325  if (byte == 0x62) {
326  uint8_t byte1, byte2;
327  if (consume(insn, byte1)) {
328  LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");
329  return -1;
330  }
331 
332  if (peek(insn, byte2)) {
333  LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
334  return -1;
335  }
336 
337  if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
338  ((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) {
340  } else {
341  --insn->readerCursor; // unconsume byte1
342  --insn->readerCursor; // unconsume byte
343  }
344 
345  if (insn->vectorExtensionType == TYPE_EVEX) {
346  insn->vectorExtensionPrefix[0] = byte;
347  insn->vectorExtensionPrefix[1] = byte1;
348  if (consume(insn, insn->vectorExtensionPrefix[2])) {
349  LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
350  return -1;
351  }
352  if (consume(insn, insn->vectorExtensionPrefix[3])) {
353  LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");
354  return -1;
355  }
356 
357  // We simulate the REX prefix for simplicity's sake
358  if (insn->mode == MODE_64BIT) {
359  insn->rexPrefix = 0x40 |
360  (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |
361  (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |
362  (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |
363  (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
364  }
365 
366  LLVM_DEBUG(
367  dbgs() << format(
368  "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
369  insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
370  insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]));
371  }
372  } else if (byte == 0xc4) {
373  uint8_t byte1;
374  if (peek(insn, byte1)) {
375  LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
376  return -1;
377  }
378 
379  if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
381  else
382  --insn->readerCursor;
383 
384  if (insn->vectorExtensionType == TYPE_VEX_3B) {
385  insn->vectorExtensionPrefix[0] = byte;
386  consume(insn, insn->vectorExtensionPrefix[1]);
387  consume(insn, insn->vectorExtensionPrefix[2]);
388 
389  // We simulate the REX prefix for simplicity's sake
390 
391  if (insn->mode == MODE_64BIT)
392  insn->rexPrefix = 0x40 |
393  (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) |
394  (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) |
395  (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) |
396  (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
397 
398  LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
399  insn->vectorExtensionPrefix[0],
400  insn->vectorExtensionPrefix[1],
401  insn->vectorExtensionPrefix[2]));
402  }
403  } else if (byte == 0xc5) {
404  uint8_t byte1;
405  if (peek(insn, byte1)) {
406  LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
407  return -1;
408  }
409 
410  if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
412  else
413  --insn->readerCursor;
414 
415  if (insn->vectorExtensionType == TYPE_VEX_2B) {
416  insn->vectorExtensionPrefix[0] = byte;
417  consume(insn, insn->vectorExtensionPrefix[1]);
418 
419  if (insn->mode == MODE_64BIT)
420  insn->rexPrefix =
421  0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
422 
423  switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
424  default:
425  break;
426  case VEX_PREFIX_66:
427  insn->hasOpSize = true;
428  break;
429  }
430 
431  LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx",
432  insn->vectorExtensionPrefix[0],
433  insn->vectorExtensionPrefix[1]));
434  }
435  } else if (byte == 0x8f) {
436  uint8_t byte1;
437  if (peek(insn, byte1)) {
438  LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");
439  return -1;
440  }
441 
442  if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction.
444  else
445  --insn->readerCursor;
446 
447  if (insn->vectorExtensionType == TYPE_XOP) {
448  insn->vectorExtensionPrefix[0] = byte;
449  consume(insn, insn->vectorExtensionPrefix[1]);
450  consume(insn, insn->vectorExtensionPrefix[2]);
451 
452  // We simulate the REX prefix for simplicity's sake
453 
454  if (insn->mode == MODE_64BIT)
455  insn->rexPrefix = 0x40 |
456  (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) |
457  (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) |
458  (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) |
459  (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
460 
461  switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
462  default:
463  break;
464  case VEX_PREFIX_66:
465  insn->hasOpSize = true;
466  break;
467  }
468 
469  LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
470  insn->vectorExtensionPrefix[0],
471  insn->vectorExtensionPrefix[1],
472  insn->vectorExtensionPrefix[2]));
473  }
474  } else if (isREX(insn, byte)) {
475  if (peek(insn, nextByte))
476  return -1;
477  insn->rexPrefix = byte;
478  LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte));
479  } else
480  --insn->readerCursor;
481 
482  if (insn->mode == MODE_16BIT) {
483  insn->registerSize = (insn->hasOpSize ? 4 : 2);
484  insn->addressSize = (insn->hasAdSize ? 4 : 2);
485  insn->displacementSize = (insn->hasAdSize ? 4 : 2);
486  insn->immediateSize = (insn->hasOpSize ? 4 : 2);
487  } else if (insn->mode == MODE_32BIT) {
488  insn->registerSize = (insn->hasOpSize ? 2 : 4);
489  insn->addressSize = (insn->hasAdSize ? 2 : 4);
490  insn->displacementSize = (insn->hasAdSize ? 2 : 4);
491  insn->immediateSize = (insn->hasOpSize ? 2 : 4);
492  } else if (insn->mode == MODE_64BIT) {
493  insn->displacementSize = 4;
494  if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
495  insn->registerSize = 8;
496  insn->addressSize = (insn->hasAdSize ? 4 : 8);
497  insn->immediateSize = 4;
498  insn->hasOpSize = false;
499  } else {
500  insn->registerSize = (insn->hasOpSize ? 2 : 4);
501  insn->addressSize = (insn->hasAdSize ? 4 : 8);
502  insn->immediateSize = (insn->hasOpSize ? 2 : 4);
503  }
504  }
505 
506  return 0;
507 }
508 
509 // Consumes the SIB byte to determine addressing information.
510 static int readSIB(struct InternalInstruction *insn) {
511  SIBBase sibBaseBase = SIB_BASE_NONE;
512  uint8_t index, base;
513 
514  LLVM_DEBUG(dbgs() << "readSIB()");
515  switch (insn->addressSize) {
516  case 2:
517  default:
518  llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");
519  case 4:
520  insn->sibIndexBase = SIB_INDEX_EAX;
521  sibBaseBase = SIB_BASE_EAX;
522  break;
523  case 8:
524  insn->sibIndexBase = SIB_INDEX_RAX;
525  sibBaseBase = SIB_BASE_RAX;
526  break;
527  }
528 
529  if (consume(insn, insn->sib))
530  return -1;
531 
532  index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
533 
534  if (index == 0x4) {
535  insn->sibIndex = SIB_INDEX_NONE;
536  } else {
537  insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
538  }
539 
540  insn->sibScale = 1 << scaleFromSIB(insn->sib);
541 
542  base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
543 
544  switch (base) {
545  case 0x5:
546  case 0xd:
547  switch (modFromModRM(insn->modRM)) {
548  case 0x0:
549  insn->eaDisplacement = EA_DISP_32;
550  insn->sibBase = SIB_BASE_NONE;
551  break;
552  case 0x1:
553  insn->eaDisplacement = EA_DISP_8;
554  insn->sibBase = (SIBBase)(sibBaseBase + base);
555  break;
556  case 0x2:
557  insn->eaDisplacement = EA_DISP_32;
558  insn->sibBase = (SIBBase)(sibBaseBase + base);
559  break;
560  default:
561  llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte");
562  }
563  break;
564  default:
565  insn->sibBase = (SIBBase)(sibBaseBase + base);
566  break;
567  }
568 
569  return 0;
570 }
571 
572 static int readDisplacement(struct InternalInstruction *insn) {
573  int8_t d8;
574  int16_t d16;
575  int32_t d32;
576  LLVM_DEBUG(dbgs() << "readDisplacement()");
577 
578  insn->displacementOffset = insn->readerCursor - insn->startLocation;
579  switch (insn->eaDisplacement) {
580  case EA_DISP_NONE:
581  break;
582  case EA_DISP_8:
583  if (consume(insn, d8))
584  return -1;
585  insn->displacement = d8;
586  break;
587  case EA_DISP_16:
588  if (consume(insn, d16))
589  return -1;
590  insn->displacement = d16;
591  break;
592  case EA_DISP_32:
593  if (consume(insn, d32))
594  return -1;
595  insn->displacement = d32;
596  break;
597  }
598 
599  return 0;
600 }
601 
602 // Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
603 static int readModRM(struct InternalInstruction *insn) {
604  uint8_t mod, rm, reg, evexrm;
605  LLVM_DEBUG(dbgs() << "readModRM()");
606 
607  if (insn->consumedModRM)
608  return 0;
609 
610  if (consume(insn, insn->modRM))
611  return -1;
612  insn->consumedModRM = true;
613 
614  mod = modFromModRM(insn->modRM);
615  rm = rmFromModRM(insn->modRM);
616  reg = regFromModRM(insn->modRM);
617 
618  // This goes by insn->registerSize to pick the correct register, which messes
619  // up if we're using (say) XMM or 8-bit register operands. That gets fixed in
620  // fixupReg().
621  switch (insn->registerSize) {
622  case 2:
623  insn->regBase = MODRM_REG_AX;
624  insn->eaRegBase = EA_REG_AX;
625  break;
626  case 4:
627  insn->regBase = MODRM_REG_EAX;
628  insn->eaRegBase = EA_REG_EAX;
629  break;
630  case 8:
631  insn->regBase = MODRM_REG_RAX;
632  insn->eaRegBase = EA_REG_RAX;
633  break;
634  }
635 
636  reg |= rFromREX(insn->rexPrefix) << 3;
637  rm |= bFromREX(insn->rexPrefix) << 3;
638 
639  evexrm = 0;
640  if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
641  reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
642  evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
643  }
644 
645  insn->reg = (Reg)(insn->regBase + reg);
646 
647  switch (insn->addressSize) {
648  case 2: {
649  EABase eaBaseBase = EA_BASE_BX_SI;
650 
651  switch (mod) {
652  case 0x0:
653  if (rm == 0x6) {
654  insn->eaBase = EA_BASE_NONE;
655  insn->eaDisplacement = EA_DISP_16;
656  if (readDisplacement(insn))
657  return -1;
658  } else {
659  insn->eaBase = (EABase)(eaBaseBase + rm);
661  }
662  break;
663  case 0x1:
664  insn->eaBase = (EABase)(eaBaseBase + rm);
665  insn->eaDisplacement = EA_DISP_8;
666  insn->displacementSize = 1;
667  if (readDisplacement(insn))
668  return -1;
669  break;
670  case 0x2:
671  insn->eaBase = (EABase)(eaBaseBase + rm);
672  insn->eaDisplacement = EA_DISP_16;
673  if (readDisplacement(insn))
674  return -1;
675  break;
676  case 0x3:
677  insn->eaBase = (EABase)(insn->eaRegBase + rm);
678  if (readDisplacement(insn))
679  return -1;
680  break;
681  }
682  break;
683  }
684  case 4:
685  case 8: {
686  EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
687 
688  switch (mod) {
689  case 0x0:
690  insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this
691  // In determining whether RIP-relative mode is used (rm=5),
692  // or whether a SIB byte is present (rm=4),
693  // the extension bits (REX.b and EVEX.x) are ignored.
694  switch (rm & 7) {
695  case 0x4: // SIB byte is present
696  insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64);
697  if (readSIB(insn) || readDisplacement(insn))
698  return -1;
699  break;
700  case 0x5: // RIP-relative
701  insn->eaBase = EA_BASE_NONE;
702  insn->eaDisplacement = EA_DISP_32;
703  if (readDisplacement(insn))
704  return -1;
705  break;
706  default:
707  insn->eaBase = (EABase)(eaBaseBase + rm);
708  break;
709  }
710  break;
711  case 0x1:
712  insn->displacementSize = 1;
713  [[fallthrough]];
714  case 0x2:
715  insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
716  switch (rm & 7) {
717  case 0x4: // SIB byte is present
718  insn->eaBase = EA_BASE_sib;
719  if (readSIB(insn) || readDisplacement(insn))
720  return -1;
721  break;
722  default:
723  insn->eaBase = (EABase)(eaBaseBase + rm);
724  if (readDisplacement(insn))
725  return -1;
726  break;
727  }
728  break;
729  case 0x3:
731  insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
732  break;
733  }
734  break;
735  }
736  } // switch (insn->addressSize)
737 
738  return 0;
739 }
740 
741 #define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
742  static uint16_t name(struct InternalInstruction *insn, OperandType type, \
743  uint8_t index, uint8_t *valid) { \
744  *valid = 1; \
745  switch (type) { \
746  default: \
747  debug("Unhandled register type"); \
748  *valid = 0; \
749  return 0; \
750  case TYPE_Rv: \
751  return base + index; \
752  case TYPE_R8: \
753  index &= mask; \
754  if (index > 0xf) \
755  *valid = 0; \
756  if (insn->rexPrefix && index >= 4 && index <= 7) { \
757  return prefix##_SPL + (index - 4); \
758  } else { \
759  return prefix##_AL + index; \
760  } \
761  case TYPE_R16: \
762  index &= mask; \
763  if (index > 0xf) \
764  *valid = 0; \
765  return prefix##_AX + index; \
766  case TYPE_R32: \
767  index &= mask; \
768  if (index > 0xf) \
769  *valid = 0; \
770  return prefix##_EAX + index; \
771  case TYPE_R64: \
772  index &= mask; \
773  if (index > 0xf) \
774  *valid = 0; \
775  return prefix##_RAX + index; \
776  case TYPE_ZMM: \
777  return prefix##_ZMM0 + index; \
778  case TYPE_YMM: \
779  return prefix##_YMM0 + index; \
780  case TYPE_XMM: \
781  return prefix##_XMM0 + index; \
782  case TYPE_TMM: \
783  if (index > 7) \
784  *valid = 0; \
785  return prefix##_TMM0 + index; \
786  case TYPE_VK: \
787  index &= 0xf; \
788  if (index > 7) \
789  *valid = 0; \
790  return prefix##_K0 + index; \
791  case TYPE_VK_PAIR: \
792  if (index > 7) \
793  *valid = 0; \
794  return prefix##_K0_K1 + (index / 2); \
795  case TYPE_MM64: \
796  return prefix##_MM0 + (index & 0x7); \
797  case TYPE_SEGMENTREG: \
798  if ((index & 7) > 5) \
799  *valid = 0; \
800  return prefix##_ES + (index & 7); \
801  case TYPE_DEBUGREG: \
802  return prefix##_DR0 + index; \
803  case TYPE_CONTROLREG: \
804  return prefix##_CR0 + index; \
805  case TYPE_MVSIBX: \
806  return prefix##_XMM0 + index; \
807  case TYPE_MVSIBY: \
808  return prefix##_YMM0 + index; \
809  case TYPE_MVSIBZ: \
810  return prefix##_ZMM0 + index; \
811  } \
812  }
813 
814 // Consult an operand type to determine the meaning of the reg or R/M field. If
815 // the operand is an XMM operand, for example, an operand would be XMM0 instead
816 // of AX, which readModRM() would otherwise misinterpret it as.
817 //
818 // @param insn - The instruction containing the operand.
819 // @param type - The operand type.
820 // @param index - The existing value of the field as reported by readModRM().
821 // @param valid - The address of a uint8_t. The target is set to 1 if the
822 // field is valid for the register class; 0 if not.
823 // @return - The proper value.
824 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)
825 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)
826 
827 // Consult an operand specifier to determine which of the fixup*Value functions
828 // to use in correcting readModRM()'ss interpretation.
829 //
830 // @param insn - See fixup*Value().
831 // @param op - The operand specifier.
832 // @return - 0 if fixup was successful; -1 if the register returned was
833 // invalid for its class.
834 static int fixupReg(struct InternalInstruction *insn,
835  const struct OperandSpecifier *op) {
836  uint8_t valid;
837  LLVM_DEBUG(dbgs() << "fixupReg()");
838 
839  switch ((OperandEncoding)op->encoding) {
840  default:
841  debug("Expected a REG or R/M encoding in fixupReg");
842  return -1;
843  case ENCODING_VVVV:
844  insn->vvvv =
845  (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid);
846  if (!valid)
847  return -1;
848  break;
849  case ENCODING_REG:
850  insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type,
851  insn->reg - insn->regBase, &valid);
852  if (!valid)
853  return -1;
854  break;
855  case ENCODING_SIB:
857  if (insn->eaBase >= insn->eaRegBase) {
858  insn->eaBase = (EABase)fixupRMValue(
859  insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);
860  if (!valid)
861  return -1;
862  }
863  break;
864  }
865 
866  return 0;
867 }
868 
869 // Read the opcode (except the ModR/M byte in the case of extended or escape
870 // opcodes).
871 static bool readOpcode(struct InternalInstruction *insn) {
872  uint8_t current;
873  LLVM_DEBUG(dbgs() << "readOpcode()");
874 
875  insn->opcodeType = ONEBYTE;
876  if (insn->vectorExtensionType == TYPE_EVEX) {
877  switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
878  default:
879  LLVM_DEBUG(
880  dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",
882  return true;
883  case VEX_LOB_0F:
884  insn->opcodeType = TWOBYTE;
885  return consume(insn, insn->opcode);
886  case VEX_LOB_0F38:
887  insn->opcodeType = THREEBYTE_38;
888  return consume(insn, insn->opcode);
889  case VEX_LOB_0F3A:
890  insn->opcodeType = THREEBYTE_3A;
891  return consume(insn, insn->opcode);
892  case VEX_LOB_MAP5:
893  insn->opcodeType = MAP5;
894  return consume(insn, insn->opcode);
895  case VEX_LOB_MAP6:
896  insn->opcodeType = MAP6;
897  return consume(insn, insn->opcode);
898  }
899  } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
900  switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
901  default:
902  LLVM_DEBUG(
903  dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
905  return true;
906  case VEX_LOB_0F:
907  insn->opcodeType = TWOBYTE;
908  return consume(insn, insn->opcode);
909  case VEX_LOB_0F38:
910  insn->opcodeType = THREEBYTE_38;
911  return consume(insn, insn->opcode);
912  case VEX_LOB_0F3A:
913  insn->opcodeType = THREEBYTE_3A;
914  return consume(insn, insn->opcode);
915  case VEX_LOB_MAP5:
916  insn->opcodeType = MAP5;
917  return consume(insn, insn->opcode);
918  case VEX_LOB_MAP6:
919  insn->opcodeType = MAP6;
920  return consume(insn, insn->opcode);
921  }
922  } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
923  insn->opcodeType = TWOBYTE;
924  return consume(insn, insn->opcode);
925  } else if (insn->vectorExtensionType == TYPE_XOP) {
926  switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
927  default:
928  LLVM_DEBUG(
929  dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
931  return true;
932  case XOP_MAP_SELECT_8:
933  insn->opcodeType = XOP8_MAP;
934  return consume(insn, insn->opcode);
935  case XOP_MAP_SELECT_9:
936  insn->opcodeType = XOP9_MAP;
937  return consume(insn, insn->opcode);
938  case XOP_MAP_SELECT_A:
939  insn->opcodeType = XOPA_MAP;
940  return consume(insn, insn->opcode);
941  }
942  }
943 
944  if (consume(insn, current))
945  return true;
946 
947  if (current == 0x0f) {
948  LLVM_DEBUG(
949  dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));
950  if (consume(insn, current))
951  return true;
952 
953  if (current == 0x38) {
954  LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
955  current));
956  if (consume(insn, current))
957  return true;
958 
959  insn->opcodeType = THREEBYTE_38;
960  } else if (current == 0x3a) {
961  LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
962  current));
963  if (consume(insn, current))
964  return true;
965 
966  insn->opcodeType = THREEBYTE_3A;
967  } else if (current == 0x0f) {
968  LLVM_DEBUG(
969  dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));
970 
971  // Consume operands before the opcode to comply with the 3DNow encoding
972  if (readModRM(insn))
973  return true;
974 
975  if (consume(insn, current))
976  return true;
977 
978  insn->opcodeType = THREEDNOW_MAP;
979  } else {
980  LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");
981  insn->opcodeType = TWOBYTE;
982  }
983  } else if (insn->mandatoryPrefix)
984  // The opcode with mandatory prefix must start with opcode escape.
985  // If not it's legacy repeat prefix
986  insn->mandatoryPrefix = 0;
987 
988  // At this point we have consumed the full opcode.
989  // Anything we consume from here on must be unconsumed.
990  insn->opcode = current;
991 
992  return false;
993 }
994 
995 // Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).
996 static bool is16BitEquivalent(const char *orig, const char *equiv) {
997  for (int i = 0;; i++) {
998  if (orig[i] == '\0' && equiv[i] == '\0')
999  return true;
1000  if (orig[i] == '\0' || equiv[i] == '\0')
1001  return false;
1002  if (orig[i] != equiv[i]) {
1003  if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
1004  continue;
1005  if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
1006  continue;
1007  if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
1008  continue;
1009  return false;
1010  }
1011  }
1012 }
1013 
1014 // Determine whether this instruction is a 64-bit instruction.
1015 static bool is64Bit(const char *name) {
1016  for (int i = 0;; ++i) {
1017  if (name[i] == '\0')
1018  return false;
1019  if (name[i] == '6' && name[i + 1] == '4')
1020  return true;
1021  }
1022 }
1023 
1024 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1025 // for extended and escape opcodes, and using a supplied attribute mask.
1026 static int getInstructionIDWithAttrMask(uint16_t *instructionID,
1027  struct InternalInstruction *insn,
1028  uint16_t attrMask) {
1029  auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);
1030  const ContextDecision *decision;
1031  switch (insn->opcodeType) {
1032  case ONEBYTE:
1033  decision = &ONEBYTE_SYM;
1034  break;
1035  case TWOBYTE:
1036  decision = &TWOBYTE_SYM;
1037  break;
1038  case THREEBYTE_38:
1039  decision = &THREEBYTE38_SYM;
1040  break;
1041  case THREEBYTE_3A:
1042  decision = &THREEBYTE3A_SYM;
1043  break;
1044  case XOP8_MAP:
1045  decision = &XOP8_MAP_SYM;
1046  break;
1047  case XOP9_MAP:
1048  decision = &XOP9_MAP_SYM;
1049  break;
1050  case XOPA_MAP:
1051  decision = &XOPA_MAP_SYM;
1052  break;
1053  case THREEDNOW_MAP:
1054  decision = &THREEDNOW_MAP_SYM;
1055  break;
1056  case MAP5:
1057  decision = &MAP5_SYM;
1058  break;
1059  case MAP6:
1060  decision = &MAP6_SYM;
1061  break;
1062  }
1063 
1064  if (decision->opcodeDecisions[insnCtx]
1065  .modRMDecisions[insn->opcode]
1066  .modrm_type != MODRM_ONEENTRY) {
1067  if (readModRM(insn))
1068  return -1;
1069  *instructionID =
1070  decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM);
1071  } else {
1072  *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0);
1073  }
1074 
1075  return 0;
1076 }
1077 
1078 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1079 // for extended and escape opcodes. Determines the attributes and context for
1080 // the instruction before doing so.
1081 static int getInstructionID(struct InternalInstruction *insn,
1082  const MCInstrInfo *mii) {
1083  uint16_t attrMask;
1084  uint16_t instructionID;
1085 
1086  LLVM_DEBUG(dbgs() << "getID()");
1087 
1088  attrMask = ATTR_NONE;
1089 
1090  if (insn->mode == MODE_64BIT)
1091  attrMask |= ATTR_64BIT;
1092 
1093  if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1094  attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
1095 
1096  if (insn->vectorExtensionType == TYPE_EVEX) {
1097  switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
1098  case VEX_PREFIX_66:
1099  attrMask |= ATTR_OPSIZE;
1100  break;
1101  case VEX_PREFIX_F3:
1102  attrMask |= ATTR_XS;
1103  break;
1104  case VEX_PREFIX_F2:
1105  attrMask |= ATTR_XD;
1106  break;
1107  }
1108 
1109  if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1110  attrMask |= ATTR_EVEXKZ;
1111  if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1112  attrMask |= ATTR_EVEXB;
1113  if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1114  attrMask |= ATTR_EVEXK;
1115  if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1116  attrMask |= ATTR_VEXL;
1117  if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1118  attrMask |= ATTR_EVEXL2;
1119  } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
1120  switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
1121  case VEX_PREFIX_66:
1122  attrMask |= ATTR_OPSIZE;
1123  break;
1124  case VEX_PREFIX_F3:
1125  attrMask |= ATTR_XS;
1126  break;
1127  case VEX_PREFIX_F2:
1128  attrMask |= ATTR_XD;
1129  break;
1130  }
1131 
1132  if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
1133  attrMask |= ATTR_VEXL;
1134  } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
1135  switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
1136  case VEX_PREFIX_66:
1137  attrMask |= ATTR_OPSIZE;
1138  if (insn->hasAdSize)
1139  attrMask |= ATTR_ADSIZE;
1140  break;
1141  case VEX_PREFIX_F3:
1142  attrMask |= ATTR_XS;
1143  break;
1144  case VEX_PREFIX_F2:
1145  attrMask |= ATTR_XD;
1146  break;
1147  }
1148 
1149  if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
1150  attrMask |= ATTR_VEXL;
1151  } else if (insn->vectorExtensionType == TYPE_XOP) {
1152  switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
1153  case VEX_PREFIX_66:
1154  attrMask |= ATTR_OPSIZE;
1155  break;
1156  case VEX_PREFIX_F3:
1157  attrMask |= ATTR_XS;
1158  break;
1159  case VEX_PREFIX_F2:
1160  attrMask |= ATTR_XD;
1161  break;
1162  }
1163 
1164  if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
1165  attrMask |= ATTR_VEXL;
1166  } else {
1167  return -1;
1168  }
1169  } else if (!insn->mandatoryPrefix) {
1170  // If we don't have mandatory prefix we should use legacy prefixes here
1171  if (insn->hasOpSize && (insn->mode != MODE_16BIT))
1172  attrMask |= ATTR_OPSIZE;
1173  if (insn->hasAdSize)
1174  attrMask |= ATTR_ADSIZE;
1175  if (insn->opcodeType == ONEBYTE) {
1176  if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
1177  // Special support for PAUSE
1178  attrMask |= ATTR_XS;
1179  } else {
1180  if (insn->repeatPrefix == 0xf2)
1181  attrMask |= ATTR_XD;
1182  else if (insn->repeatPrefix == 0xf3)
1183  attrMask |= ATTR_XS;
1184  }
1185  } else {
1186  switch (insn->mandatoryPrefix) {
1187  case 0xf2:
1188  attrMask |= ATTR_XD;
1189  break;
1190  case 0xf3:
1191  attrMask |= ATTR_XS;
1192  break;
1193  case 0x66:
1194  if (insn->mode != MODE_16BIT)
1195  attrMask |= ATTR_OPSIZE;
1196  if (insn->hasAdSize)
1197  attrMask |= ATTR_ADSIZE;
1198  break;
1199  case 0x67:
1200  attrMask |= ATTR_ADSIZE;
1201  break;
1202  }
1203  }
1204 
1205  if (insn->rexPrefix & 0x08) {
1206  attrMask |= ATTR_REXW;
1207  attrMask &= ~ATTR_ADSIZE;
1208  }
1209 
1210  if (insn->mode == MODE_16BIT) {
1211  // JCXZ/JECXZ need special handling for 16-bit mode because the meaning
1212  // of the AdSize prefix is inverted w.r.t. 32-bit mode.
1213  if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3)
1214  attrMask ^= ATTR_ADSIZE;
1215  // If we're in 16-bit mode and this is one of the relative jumps and opsize
1216  // prefix isn't present, we need to force the opsize attribute since the
1217  // prefix is inverted relative to 32-bit mode.
1218  if (!insn->hasOpSize && insn->opcodeType == ONEBYTE &&
1219  (insn->opcode == 0xE8 || insn->opcode == 0xE9))
1220  attrMask |= ATTR_OPSIZE;
1221 
1222  if (!insn->hasOpSize && insn->opcodeType == TWOBYTE &&
1223  insn->opcode >= 0x80 && insn->opcode <= 0x8F)
1224  attrMask |= ATTR_OPSIZE;
1225  }
1226 
1227 
1228  if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
1229  return -1;
1230 
1231  // The following clauses compensate for limitations of the tables.
1232 
1233  if (insn->mode != MODE_64BIT &&
1235  // The tables can't distinquish between cases where the W-bit is used to
1236  // select register size and cases where its a required part of the opcode.
1237  if ((insn->vectorExtensionType == TYPE_EVEX &&
1238  wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
1239  (insn->vectorExtensionType == TYPE_VEX_3B &&
1240  wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
1241  (insn->vectorExtensionType == TYPE_XOP &&
1242  wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
1243 
1244  uint16_t instructionIDWithREXW;
1245  if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn,
1246  attrMask | ATTR_REXW)) {
1247  insn->instructionID = instructionID;
1248  insn->spec = &INSTRUCTIONS_SYM[instructionID];
1249  return 0;
1250  }
1251 
1252  auto SpecName = mii->getName(instructionIDWithREXW);
1253  // If not a 64-bit instruction. Switch the opcode.
1254  if (!is64Bit(SpecName.data())) {
1255  insn->instructionID = instructionIDWithREXW;
1256  insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW];
1257  return 0;
1258  }
1259  }
1260  }
1261 
1262  // Absolute moves, umonitor, and movdir64b need special handling.
1263  // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1264  // inverted w.r.t.
1265  // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1266  // any position.
1267  if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
1268  (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
1269  (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {
1270  // Make sure we observed the prefixes in any position.
1271  if (insn->hasAdSize)
1272  attrMask |= ATTR_ADSIZE;
1273  if (insn->hasOpSize)
1274  attrMask |= ATTR_OPSIZE;
1275 
1276  // In 16-bit, invert the attributes.
1277  if (insn->mode == MODE_16BIT) {
1278  attrMask ^= ATTR_ADSIZE;
1279 
1280  // The OpSize attribute is only valid with the absolute moves.
1281  if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
1282  attrMask ^= ATTR_OPSIZE;
1283  }
1284 
1285  if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
1286  return -1;
1287 
1288  insn->instructionID = instructionID;
1289  insn->spec = &INSTRUCTIONS_SYM[instructionID];
1290  return 0;
1291  }
1292 
1293  if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
1294  !(attrMask & ATTR_OPSIZE)) {
1295  // The instruction tables make no distinction between instructions that
1296  // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1297  // particular spot (i.e., many MMX operations). In general we're
1298  // conservative, but in the specific case where OpSize is present but not in
1299  // the right place we check if there's a 16-bit operation.
1300  const struct InstructionSpecifier *spec;
1301  uint16_t instructionIDWithOpsize;
1302  llvm::StringRef specName, specWithOpSizeName;
1303 
1304  spec = &INSTRUCTIONS_SYM[instructionID];
1305 
1306  if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn,
1307  attrMask | ATTR_OPSIZE)) {
1308  // ModRM required with OpSize but not present. Give up and return the
1309  // version without OpSize set.
1310  insn->instructionID = instructionID;
1311  insn->spec = spec;
1312  return 0;
1313  }
1314 
1315  specName = mii->getName(instructionID);
1316  specWithOpSizeName = mii->getName(instructionIDWithOpsize);
1317 
1318  if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
1319  (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
1320  insn->instructionID = instructionIDWithOpsize;
1321  insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize];
1322  } else {
1323  insn->instructionID = instructionID;
1324  insn->spec = spec;
1325  }
1326  return 0;
1327  }
1328 
1329  if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1330  insn->rexPrefix & 0x01) {
1331  // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode
1332  // as XCHG %r8, %eax.
1333  const struct InstructionSpecifier *spec;
1334  uint16_t instructionIDWithNewOpcode;
1335  const struct InstructionSpecifier *specWithNewOpcode;
1336 
1337  spec = &INSTRUCTIONS_SYM[instructionID];
1338 
1339  // Borrow opcode from one of the other XCHGar opcodes
1340  insn->opcode = 0x91;
1341 
1342  if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn,
1343  attrMask)) {
1344  insn->opcode = 0x90;
1345 
1346  insn->instructionID = instructionID;
1347  insn->spec = spec;
1348  return 0;
1349  }
1350 
1351  specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];
1352 
1353  // Change back
1354  insn->opcode = 0x90;
1355 
1356  insn->instructionID = instructionIDWithNewOpcode;
1357  insn->spec = specWithNewOpcode;
1358 
1359  return 0;
1360  }
1361 
1362  insn->instructionID = instructionID;
1363  insn->spec = &INSTRUCTIONS_SYM[insn->instructionID];
1364 
1365  return 0;
1366 }
1367 
1368 // Read an operand from the opcode field of an instruction and interprets it
1369 // appropriately given the operand width. Handles AddRegFrm instructions.
1370 //
1371 // @param insn - the instruction whose opcode field is to be read.
1372 // @param size - The width (in bytes) of the register being specified.
1373 // 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1374 // RAX.
1375 // @return - 0 on success; nonzero otherwise.
1376 static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
1377  LLVM_DEBUG(dbgs() << "readOpcodeRegister()");
1378 
1379  if (size == 0)
1380  size = insn->registerSize;
1381 
1382  switch (size) {
1383  case 1:
1384  insn->opcodeRegister = (Reg)(
1385  MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1386  if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1387  insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1388  insn->opcodeRegister =
1389  (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4));
1390  }
1391 
1392  break;
1393  case 2:
1394  insn->opcodeRegister = (Reg)(
1395  MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1396  break;
1397  case 4:
1398  insn->opcodeRegister =
1399  (Reg)(MODRM_REG_EAX +
1400  ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1401  break;
1402  case 8:
1403  insn->opcodeRegister =
1404  (Reg)(MODRM_REG_RAX +
1405  ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1406  break;
1407  }
1408 
1409  return 0;
1410 }
1411 
1412 // Consume an immediate operand from an instruction, given the desired operand
1413 // size.
1414 //
1415 // @param insn - The instruction whose operand is to be read.
1416 // @param size - The width (in bytes) of the operand.
1417 // @return - 0 if the immediate was successfully consumed; nonzero
1418 // otherwise.
1419 static int readImmediate(struct InternalInstruction *insn, uint8_t size) {
1420  uint8_t imm8;
1421  uint16_t imm16;
1422  uint32_t imm32;
1423  uint64_t imm64;
1424 
1425  LLVM_DEBUG(dbgs() << "readImmediate()");
1426 
1427  assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates");
1428 
1429  insn->immediateSize = size;
1430  insn->immediateOffset = insn->readerCursor - insn->startLocation;
1431 
1432  switch (size) {
1433  case 1:
1434  if (consume(insn, imm8))
1435  return -1;
1436  insn->immediates[insn->numImmediatesConsumed] = imm8;
1437  break;
1438  case 2:
1439  if (consume(insn, imm16))
1440  return -1;
1441  insn->immediates[insn->numImmediatesConsumed] = imm16;
1442  break;
1443  case 4:
1444  if (consume(insn, imm32))
1445  return -1;
1446  insn->immediates[insn->numImmediatesConsumed] = imm32;
1447  break;
1448  case 8:
1449  if (consume(insn, imm64))
1450  return -1;
1451  insn->immediates[insn->numImmediatesConsumed] = imm64;
1452  break;
1453  default:
1454  llvm_unreachable("invalid size");
1455  }
1456 
1457  insn->numImmediatesConsumed++;
1458 
1459  return 0;
1460 }
1461 
1462 // Consume vvvv from an instruction if it has a VEX prefix.
1463 static int readVVVV(struct InternalInstruction *insn) {
1464  LLVM_DEBUG(dbgs() << "readVVVV()");
1465 
1466  int vvvv;
1467  if (insn->vectorExtensionType == TYPE_EVEX)
1468  vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
1470  else if (insn->vectorExtensionType == TYPE_VEX_3B)
1471  vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1472  else if (insn->vectorExtensionType == TYPE_VEX_2B)
1473  vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1474  else if (insn->vectorExtensionType == TYPE_XOP)
1475  vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1476  else
1477  return -1;
1478 
1479  if (insn->mode != MODE_64BIT)
1480  vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
1481 
1482  insn->vvvv = static_cast<Reg>(vvvv);
1483  return 0;
1484 }
1485 
1486 // Read an mask register from the opcode field of an instruction.
1487 //
1488 // @param insn - The instruction whose opcode field is to be read.
1489 // @return - 0 on success; nonzero otherwise.
1490 static int readMaskRegister(struct InternalInstruction *insn) {
1491  LLVM_DEBUG(dbgs() << "readMaskRegister()");
1492 
1493  if (insn->vectorExtensionType != TYPE_EVEX)
1494  return -1;
1495 
1496  insn->writemask =
1497  static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
1498  return 0;
1499 }
1500 
1501 // Consults the specifier for an instruction and consumes all
1502 // operands for that instruction, interpreting them as it goes.
1503 static int readOperands(struct InternalInstruction *insn) {
1504  int hasVVVV, needVVVV;
1505  int sawRegImm = 0;
1506 
1507  LLVM_DEBUG(dbgs() << "readOperands()");
1508 
1509  // If non-zero vvvv specified, make sure one of the operands uses it.
1510  hasVVVV = !readVVVV(insn);
1511  needVVVV = hasVVVV && (insn->vvvv != 0);
1512 
1513  for (const auto &Op : x86OperandSets[insn->spec->operands]) {
1514  switch (Op.encoding) {
1515  case ENCODING_NONE:
1516  case ENCODING_SI:
1517  case ENCODING_DI:
1518  break;
1520  // VSIB can use the V2 bit so check only the other bits.
1521  if (needVVVV)
1522  needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
1523  if (readModRM(insn))
1524  return -1;
1525 
1526  // Reject if SIB wasn't used.
1527  if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1528  return -1;
1529 
1530  // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
1531  if (insn->sibIndex == SIB_INDEX_NONE)
1532  insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
1533 
1534  // If EVEX.v2 is set this is one of the 16-31 registers.
1535  if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
1537  insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
1538 
1539  // Adjust the index register to the correct size.
1540  switch ((OperandType)Op.type) {
1541  default:
1542  debug("Unhandled VSIB index type");
1543  return -1;
1544  case TYPE_MVSIBX:
1545  insn->sibIndex =
1546  (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase));
1547  break;
1548  case TYPE_MVSIBY:
1549  insn->sibIndex =
1550  (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase));
1551  break;
1552  case TYPE_MVSIBZ:
1553  insn->sibIndex =
1554  (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase));
1555  break;
1556  }
1557 
1558  // Apply the AVX512 compressed displacement scaling factor.
1559  if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1560  insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
1561  break;
1562  case ENCODING_SIB:
1563  // Reject if SIB wasn't used.
1564  if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1565  return -1;
1566  if (readModRM(insn))
1567  return -1;
1568  if (fixupReg(insn, &Op))
1569  return -1;
1570  break;
1571  case ENCODING_REG:
1573  if (readModRM(insn))
1574  return -1;
1575  if (fixupReg(insn, &Op))
1576  return -1;
1577  // Apply the AVX512 compressed displacement scaling factor.
1578  if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1579  insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
1580  break;
1581  case ENCODING_IB:
1582  if (sawRegImm) {
1583  // Saw a register immediate so don't read again and instead split the
1584  // previous immediate. FIXME: This is a hack.
1585  insn->immediates[insn->numImmediatesConsumed] =
1586  insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1587  ++insn->numImmediatesConsumed;
1588  break;
1589  }
1590  if (readImmediate(insn, 1))
1591  return -1;
1592  if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
1593  sawRegImm = 1;
1594  break;
1595  case ENCODING_IW:
1596  if (readImmediate(insn, 2))
1597  return -1;
1598  break;
1599  case ENCODING_ID:
1600  if (readImmediate(insn, 4))
1601  return -1;
1602  break;
1603  case ENCODING_IO:
1604  if (readImmediate(insn, 8))
1605  return -1;
1606  break;
1607  case ENCODING_Iv:
1608  if (readImmediate(insn, insn->immediateSize))
1609  return -1;
1610  break;
1611  case ENCODING_Ia:
1612  if (readImmediate(insn, insn->addressSize))
1613  return -1;
1614  break;
1615  case ENCODING_IRC:
1616  insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
1618  break;
1619  case ENCODING_RB:
1620  if (readOpcodeRegister(insn, 1))
1621  return -1;
1622  break;
1623  case ENCODING_RW:
1624  if (readOpcodeRegister(insn, 2))
1625  return -1;
1626  break;
1627  case ENCODING_RD:
1628  if (readOpcodeRegister(insn, 4))
1629  return -1;
1630  break;
1631  case ENCODING_RO:
1632  if (readOpcodeRegister(insn, 8))
1633  return -1;
1634  break;
1635  case ENCODING_Rv:
1636  if (readOpcodeRegister(insn, 0))
1637  return -1;
1638  break;
1639  case ENCODING_CC:
1640  insn->immediates[1] = insn->opcode & 0xf;
1641  break;
1642  case ENCODING_FP:
1643  break;
1644  case ENCODING_VVVV:
1645  needVVVV = 0; // Mark that we have found a VVVV operand.
1646  if (!hasVVVV)
1647  return -1;
1648  if (insn->mode != MODE_64BIT)
1649  insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
1650  if (fixupReg(insn, &Op))
1651  return -1;
1652  break;
1653  case ENCODING_WRITEMASK:
1654  if (readMaskRegister(insn))
1655  return -1;
1656  break;
1657  case ENCODING_DUP:
1658  break;
1659  default:
1660  LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");
1661  return -1;
1662  }
1663  }
1664 
1665  // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail
1666  if (needVVVV)
1667  return -1;
1668 
1669  return 0;
1670 }
1671 
1672 namespace llvm {
1673 
1674 // Fill-ins to make the compiler happy. These constants are never actually
1675 // assigned; they are just filler to make an automatically-generated switch
1676 // statement work.
1677 namespace X86 {
1678  enum {
1679  BX_SI = 500,
1680  BX_DI = 501,
1681  BP_SI = 502,
1682  BP_DI = 503,
1683  sib = 504,
1684  sib64 = 505
1685  };
1686 } // namespace X86
1687 
1688 } // namespace llvm
1689 
1690 static bool translateInstruction(MCInst &target,
1691  InternalInstruction &source,
1692  const MCDisassembler *Dis);
1693 
1694 namespace {
1695 
1696 /// Generic disassembler for all X86 platforms. All each platform class should
1697 /// have to do is subclass the constructor, and provide a different
1698 /// disassemblerMode value.
1699 class X86GenericDisassembler : public MCDisassembler {
1700  std::unique_ptr<const MCInstrInfo> MII;
1701 public:
1702  X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
1703  std::unique_ptr<const MCInstrInfo> MII);
1704 public:
1705  DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
1706  ArrayRef<uint8_t> Bytes, uint64_t Address,
1707  raw_ostream &cStream) const override;
1708 
1709 private:
1710  DisassemblerMode fMode;
1711 };
1712 
1713 } // namespace
1714 
1715 X86GenericDisassembler::X86GenericDisassembler(
1716  const MCSubtargetInfo &STI,
1717  MCContext &Ctx,
1718  std::unique_ptr<const MCInstrInfo> MII)
1719  : MCDisassembler(STI, Ctx), MII(std::move(MII)) {
1720  const FeatureBitset &FB = STI.getFeatureBits();
1721  if (FB[X86::Is16Bit]) {
1722  fMode = MODE_16BIT;
1723  return;
1724  } else if (FB[X86::Is32Bit]) {
1725  fMode = MODE_32BIT;
1726  return;
1727  } else if (FB[X86::Is64Bit]) {
1728  fMode = MODE_64BIT;
1729  return;
1730  }
1731 
1732  llvm_unreachable("Invalid CPU mode");
1733 }
1734 
1735 MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
1736  MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
1737  raw_ostream &CStream) const {
1738  CommentStream = &CStream;
1739 
1741  memset(&Insn, 0, sizeof(InternalInstruction));
1742  Insn.bytes = Bytes;
1743  Insn.startLocation = Address;
1744  Insn.readerCursor = Address;
1745  Insn.mode = fMode;
1746 
1747  if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) ||
1748  getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 ||
1749  readOperands(&Insn)) {
1750  Size = Insn.readerCursor - Address;
1751  return Fail;
1752  }
1753 
1754  Insn.operands = x86OperandSets[Insn.spec->operands];
1755  Insn.length = Insn.readerCursor - Insn.startLocation;
1756  Size = Insn.length;
1757  if (Size > 15)
1758  LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");
1759 
1760  bool Ret = translateInstruction(Instr, Insn, this);
1761  if (!Ret) {
1762  unsigned Flags = X86::IP_NO_PREFIX;
1763  if (Insn.hasAdSize)
1765  if (!Insn.mandatoryPrefix) {
1766  if (Insn.hasOpSize)
1768  if (Insn.repeatPrefix == 0xf2)
1770  else if (Insn.repeatPrefix == 0xf3 &&
1771  // It should not be 'pause' f3 90
1772  Insn.opcode != 0x90)
1774  if (Insn.hasLockPrefix)
1776  }
1777  Instr.setFlags(Flags);
1778  }
1779  return (!Ret) ? Success : Fail;
1780 }
1781 
1782 //
1783 // Private code that translates from struct InternalInstructions to MCInsts.
1784 //
1785 
1786 /// translateRegister - Translates an internal register to the appropriate LLVM
1787 /// register, and appends it as an operand to an MCInst.
1788 ///
1789 /// @param mcInst - The MCInst to append to.
1790 /// @param reg - The Reg to append.
1791 static void translateRegister(MCInst &mcInst, Reg reg) {
1792 #define ENTRY(x) X86::x,
1793  static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};
1794 #undef ENTRY
1795 
1796  MCPhysReg llvmRegnum = llvmRegnums[reg];
1797  mcInst.addOperand(MCOperand::createReg(llvmRegnum));
1798 }
1799 
1800 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
1801  0, // SEG_OVERRIDE_NONE
1802  X86::CS,
1803  X86::SS,
1804  X86::DS,
1805  X86::ES,
1806  X86::FS,
1807  X86::GS
1808 };
1809 
1810 /// translateSrcIndex - Appends a source index operand to an MCInst.
1811 ///
1812 /// @param mcInst - The MCInst to append to.
1813 /// @param insn - The internal instruction.
1814 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
1815  unsigned baseRegNo;
1816 
1817  if (insn.mode == MODE_64BIT)
1818  baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;
1819  else if (insn.mode == MODE_32BIT)
1820  baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;
1821  else {
1822  assert(insn.mode == MODE_16BIT);
1823  baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;
1824  }
1825  MCOperand baseReg = MCOperand::createReg(baseRegNo);
1826  mcInst.addOperand(baseReg);
1827 
1828  MCOperand segmentReg;
1830  mcInst.addOperand(segmentReg);
1831  return false;
1832 }
1833 
1834 /// translateDstIndex - Appends a destination index operand to an MCInst.
1835 ///
1836 /// @param mcInst - The MCInst to append to.
1837 /// @param insn - The internal instruction.
1838 
1839 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
1840  unsigned baseRegNo;
1841 
1842  if (insn.mode == MODE_64BIT)
1843  baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;
1844  else if (insn.mode == MODE_32BIT)
1845  baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;
1846  else {
1847  assert(insn.mode == MODE_16BIT);
1848  baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;
1849  }
1850  MCOperand baseReg = MCOperand::createReg(baseRegNo);
1851  mcInst.addOperand(baseReg);
1852  return false;
1853 }
1854 
1855 /// translateImmediate - Appends an immediate operand to an MCInst.
1856 ///
1857 /// @param mcInst - The MCInst to append to.
1858 /// @param immediate - The immediate value to append.
1859 /// @param operand - The operand, as stored in the descriptor table.
1860 /// @param insn - The internal instruction.
1861 static void translateImmediate(MCInst &mcInst, uint64_t immediate,
1862  const OperandSpecifier &operand,
1863  InternalInstruction &insn,
1864  const MCDisassembler *Dis) {
1865  // Sign-extend the immediate if necessary.
1866 
1867  OperandType type = (OperandType)operand.type;
1868 
1869  bool isBranch = false;
1870  uint64_t pcrel = 0;
1871  if (type == TYPE_REL) {
1872  isBranch = true;
1873  pcrel = insn.startLocation + insn.length;
1874  switch (operand.encoding) {
1875  default:
1876  break;
1877  case ENCODING_Iv:
1878  switch (insn.displacementSize) {
1879  default:
1880  break;
1881  case 1:
1882  if(immediate & 0x80)
1883  immediate |= ~(0xffull);
1884  break;
1885  case 2:
1886  if(immediate & 0x8000)
1887  immediate |= ~(0xffffull);
1888  break;
1889  case 4:
1890  if(immediate & 0x80000000)
1891  immediate |= ~(0xffffffffull);
1892  break;
1893  case 8:
1894  break;
1895  }
1896  break;
1897  case ENCODING_IB:
1898  if(immediate & 0x80)
1899  immediate |= ~(0xffull);
1900  break;
1901  case ENCODING_IW:
1902  if(immediate & 0x8000)
1903  immediate |= ~(0xffffull);
1904  break;
1905  case ENCODING_ID:
1906  if(immediate & 0x80000000)
1907  immediate |= ~(0xffffffffull);
1908  break;
1909  }
1910  }
1911  // By default sign-extend all X86 immediates based on their encoding.
1912  else if (type == TYPE_IMM) {
1913  switch (operand.encoding) {
1914  default:
1915  break;
1916  case ENCODING_IB:
1917  if(immediate & 0x80)
1918  immediate |= ~(0xffull);
1919  break;
1920  case ENCODING_IW:
1921  if(immediate & 0x8000)
1922  immediate |= ~(0xffffull);
1923  break;
1924  case ENCODING_ID:
1925  if(immediate & 0x80000000)
1926  immediate |= ~(0xffffffffull);
1927  break;
1928  case ENCODING_IO:
1929  break;
1930  }
1931  }
1932 
1933  switch (type) {
1934  case TYPE_XMM:
1935  mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4)));
1936  return;
1937  case TYPE_YMM:
1938  mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4)));
1939  return;
1940  case TYPE_ZMM:
1941  mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));
1942  return;
1943  default:
1944  // operand is 64 bits wide. Do nothing.
1945  break;
1946  }
1947 
1948  if (!Dis->tryAddingSymbolicOperand(
1949  mcInst, immediate + pcrel, insn.startLocation, isBranch,
1950  insn.immediateOffset, insn.immediateSize, insn.length))
1951  mcInst.addOperand(MCOperand::createImm(immediate));
1952 
1953  if (type == TYPE_MOFFS) {
1954  MCOperand segmentReg;
1956  mcInst.addOperand(segmentReg);
1957  }
1958 }
1959 
1960 /// translateRMRegister - Translates a register stored in the R/M field of the
1961 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
1962 /// @param mcInst - The MCInst to append to.
1963 /// @param insn - The internal instruction to extract the R/M field
1964 /// from.
1965 /// @return - 0 on success; -1 otherwise
1966 static bool translateRMRegister(MCInst &mcInst,
1967  InternalInstruction &insn) {
1968  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
1969  debug("A R/M register operand may not have a SIB byte");
1970  return true;
1971  }
1972 
1973  switch (insn.eaBase) {
1974  default:
1975  debug("Unexpected EA base register");
1976  return true;
1977  case EA_BASE_NONE:
1978  debug("EA_BASE_NONE for ModR/M base");
1979  return true;
1980 #define ENTRY(x) case EA_BASE_##x:
1981  ALL_EA_BASES
1982 #undef ENTRY
1983  debug("A R/M register operand may not have a base; "
1984  "the operand must be a register.");
1985  return true;
1986 #define ENTRY(x) \
1987  case EA_REG_##x: \
1988  mcInst.addOperand(MCOperand::createReg(X86::x)); break;
1989  ALL_REGS
1990 #undef ENTRY
1991  }
1992 
1993  return false;
1994 }
1995 
1996 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
1997 /// fields of an internal instruction (and possibly its SIB byte) to a memory
1998 /// operand in LLVM's format, and appends it to an MCInst.
1999 ///
2000 /// @param mcInst - The MCInst to append to.
2001 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
2002 /// from.
2003 /// @param ForceSIB - The instruction must use SIB.
2004 /// @return - 0 on success; nonzero otherwise
2005 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
2006  const MCDisassembler *Dis,
2007  bool ForceSIB = false) {
2008  // Addresses in an MCInst are represented as five operands:
2009  // 1. basereg (register) The R/M base, or (if there is a SIB) the
2010  // SIB base
2011  // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
2012  // scale amount
2013  // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
2014  // the index (which is multiplied by the
2015  // scale amount)
2016  // 4. displacement (immediate) 0, or the displacement if there is one
2017  // 5. segmentreg (register) x86_registerNONE for now, but could be set
2018  // if we have segment overrides
2019 
2020  MCOperand baseReg;
2021  MCOperand scaleAmount;
2022  MCOperand indexReg;
2023  MCOperand displacement;
2024  MCOperand segmentReg;
2025  uint64_t pcrel = 0;
2026 
2027  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
2028  if (insn.sibBase != SIB_BASE_NONE) {
2029  switch (insn.sibBase) {
2030  default:
2031  debug("Unexpected sibBase");
2032  return true;
2033 #define ENTRY(x) \
2034  case SIB_BASE_##x: \
2035  baseReg = MCOperand::createReg(X86::x); break;
2037 #undef ENTRY
2038  }
2039  } else {
2040  baseReg = MCOperand::createReg(X86::NoRegister);
2041  }
2042 
2043  if (insn.sibIndex != SIB_INDEX_NONE) {
2044  switch (insn.sibIndex) {
2045  default:
2046  debug("Unexpected sibIndex");
2047  return true;
2048 #define ENTRY(x) \
2049  case SIB_INDEX_##x: \
2050  indexReg = MCOperand::createReg(X86::x); break;
2053  REGS_XMM
2054  REGS_YMM
2055  REGS_ZMM
2056 #undef ENTRY
2057  }
2058  } else {
2059  // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,
2060  // but no index is used and modrm alone should have been enough.
2061  // -No base register in 32-bit mode. In 64-bit mode this is used to
2062  // avoid rip-relative addressing.
2063  // -Any base register used other than ESP/RSP/R12D/R12. Using these as a
2064  // base always requires a SIB byte.
2065  // -A scale other than 1 is used.
2066  if (!ForceSIB &&
2067  (insn.sibScale != 1 ||
2068  (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||
2069  (insn.sibBase != SIB_BASE_NONE &&
2070  insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
2071  insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
2072  indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ :
2073  X86::RIZ);
2074  } else
2075  indexReg = MCOperand::createReg(X86::NoRegister);
2076  }
2077 
2078  scaleAmount = MCOperand::createImm(insn.sibScale);
2079  } else {
2080  switch (insn.eaBase) {
2081  case EA_BASE_NONE:
2082  if (insn.eaDisplacement == EA_DISP_NONE) {
2083  debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2084  return true;
2085  }
2086  if (insn.mode == MODE_64BIT){
2087  pcrel = insn.startLocation + insn.length;
2089  insn.startLocation +
2090  insn.displacementOffset);
2091  // Section 2.2.1.6
2092  baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP :
2093  X86::RIP);
2094  }
2095  else
2096  baseReg = MCOperand::createReg(X86::NoRegister);
2097 
2098  indexReg = MCOperand::createReg(X86::NoRegister);
2099  break;
2100  case EA_BASE_BX_SI:
2101  baseReg = MCOperand::createReg(X86::BX);
2102  indexReg = MCOperand::createReg(X86::SI);
2103  break;
2104  case EA_BASE_BX_DI:
2105  baseReg = MCOperand::createReg(X86::BX);
2106  indexReg = MCOperand::createReg(X86::DI);
2107  break;
2108  case EA_BASE_BP_SI:
2109  baseReg = MCOperand::createReg(X86::BP);
2110  indexReg = MCOperand::createReg(X86::SI);
2111  break;
2112  case EA_BASE_BP_DI:
2113  baseReg = MCOperand::createReg(X86::BP);
2114  indexReg = MCOperand::createReg(X86::DI);
2115  break;
2116  default:
2117  indexReg = MCOperand::createReg(X86::NoRegister);
2118  switch (insn.eaBase) {
2119  default:
2120  debug("Unexpected eaBase");
2121  return true;
2122  // Here, we will use the fill-ins defined above. However,
2123  // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
2124  // sib and sib64 were handled in the top-level if, so they're only
2125  // placeholders to keep the compiler happy.
2126 #define ENTRY(x) \
2127  case EA_BASE_##x: \
2128  baseReg = MCOperand::createReg(X86::x); break;
2129  ALL_EA_BASES
2130 #undef ENTRY
2131 #define ENTRY(x) case EA_REG_##x:
2132  ALL_REGS
2133 #undef ENTRY
2134  debug("A R/M memory operand may not be a register; "
2135  "the base field must be a base.");
2136  return true;
2137  }
2138  }
2139 
2140  scaleAmount = MCOperand::createImm(1);
2141  }
2142 
2143  displacement = MCOperand::createImm(insn.displacement);
2144 
2146 
2147  mcInst.addOperand(baseReg);
2148  mcInst.addOperand(scaleAmount);
2149  mcInst.addOperand(indexReg);
2150 
2151  const uint8_t dispSize =
2152  (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize;
2153 
2155  mcInst, insn.displacement + pcrel, insn.startLocation, false,
2156  insn.displacementOffset, dispSize, insn.length))
2157  mcInst.addOperand(displacement);
2158  mcInst.addOperand(segmentReg);
2159  return false;
2160 }
2161 
2162 /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
2163 /// byte of an instruction to LLVM form, and appends it to an MCInst.
2164 ///
2165 /// @param mcInst - The MCInst to append to.
2166 /// @param operand - The operand, as stored in the descriptor table.
2167 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
2168 /// from.
2169 /// @return - 0 on success; nonzero otherwise
2170 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
2171  InternalInstruction &insn, const MCDisassembler *Dis) {
2172  switch (operand.type) {
2173  default:
2174  debug("Unexpected type for a R/M operand");
2175  return true;
2176  case TYPE_R8:
2177  case TYPE_R16:
2178  case TYPE_R32:
2179  case TYPE_R64:
2180  case TYPE_Rv:
2181  case TYPE_MM64:
2182  case TYPE_XMM:
2183  case TYPE_YMM:
2184  case TYPE_ZMM:
2185  case TYPE_TMM:
2186  case TYPE_VK_PAIR:
2187  case TYPE_VK:
2188  case TYPE_DEBUGREG:
2189  case TYPE_CONTROLREG:
2190  case TYPE_BNDR:
2191  return translateRMRegister(mcInst, insn);
2192  case TYPE_M:
2193  case TYPE_MVSIBX:
2194  case TYPE_MVSIBY:
2195  case TYPE_MVSIBZ:
2196  return translateRMMemory(mcInst, insn, Dis);
2197  case TYPE_MSIB:
2198  return translateRMMemory(mcInst, insn, Dis, true);
2199  }
2200 }
2201 
2202 /// translateFPRegister - Translates a stack position on the FPU stack to its
2203 /// LLVM form, and appends it to an MCInst.
2204 ///
2205 /// @param mcInst - The MCInst to append to.
2206 /// @param stackPos - The stack position to translate.
2207 static void translateFPRegister(MCInst &mcInst,
2208  uint8_t stackPos) {
2209  mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos));
2210 }
2211 
2212 /// translateMaskRegister - Translates a 3-bit mask register number to
2213 /// LLVM form, and appends it to an MCInst.
2214 ///
2215 /// @param mcInst - The MCInst to append to.
2216 /// @param maskRegNum - Number of mask register from 0 to 7.
2217 /// @return - false on success; true otherwise.
2218 static bool translateMaskRegister(MCInst &mcInst,
2219  uint8_t maskRegNum) {
2220  if (maskRegNum >= 8) {
2221  debug("Invalid mask register number");
2222  return true;
2223  }
2224 
2225  mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum));
2226  return false;
2227 }
2228 
2229 /// translateOperand - Translates an operand stored in an internal instruction
2230 /// to LLVM's format and appends it to an MCInst.
2231 ///
2232 /// @param mcInst - The MCInst to append to.
2233 /// @param operand - The operand, as stored in the descriptor table.
2234 /// @param insn - The internal instruction.
2235 /// @return - false on success; true otherwise.
2236 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
2237  InternalInstruction &insn,
2238  const MCDisassembler *Dis) {
2239  switch (operand.encoding) {
2240  default:
2241  debug("Unhandled operand encoding during translation");
2242  return true;
2243  case ENCODING_REG:
2244  translateRegister(mcInst, insn.reg);
2245  return false;
2246  case ENCODING_WRITEMASK:
2247  return translateMaskRegister(mcInst, insn.writemask);
2248  case ENCODING_SIB:
2251  return translateRM(mcInst, operand, insn, Dis);
2252  case ENCODING_IB:
2253  case ENCODING_IW:
2254  case ENCODING_ID:
2255  case ENCODING_IO:
2256  case ENCODING_Iv:
2257  case ENCODING_Ia:
2258  translateImmediate(mcInst,
2259  insn.immediates[insn.numImmediatesTranslated++],
2260  operand,
2261  insn,
2262  Dis);
2263  return false;
2264  case ENCODING_IRC:
2265  mcInst.addOperand(MCOperand::createImm(insn.RC));
2266  return false;
2267  case ENCODING_SI:
2268  return translateSrcIndex(mcInst, insn);
2269  case ENCODING_DI:
2270  return translateDstIndex(mcInst, insn);
2271  case ENCODING_RB:
2272  case ENCODING_RW:
2273  case ENCODING_RD:
2274  case ENCODING_RO:
2275  case ENCODING_Rv:
2276  translateRegister(mcInst, insn.opcodeRegister);
2277  return false;
2278  case ENCODING_CC:
2279  mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));
2280  return false;
2281  case ENCODING_FP:
2282  translateFPRegister(mcInst, insn.modRM & 7);
2283  return false;
2284  case ENCODING_VVVV:
2285  translateRegister(mcInst, insn.vvvv);
2286  return false;
2287  case ENCODING_DUP:
2288  return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
2289  insn, Dis);
2290  }
2291 }
2292 
2293 /// translateInstruction - Translates an internal instruction and all its
2294 /// operands to an MCInst.
2295 ///
2296 /// @param mcInst - The MCInst to populate with the instruction's data.
2297 /// @param insn - The internal instruction.
2298 /// @return - false on success; true otherwise.
2299 static bool translateInstruction(MCInst &mcInst,
2300  InternalInstruction &insn,
2301  const MCDisassembler *Dis) {
2302  if (!insn.spec) {
2303  debug("Instruction has no specification");
2304  return true;
2305  }
2306 
2307  mcInst.clear();
2308  mcInst.setOpcode(insn.instructionID);
2309  // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
2310  // prefix bytes should be disassembled as xrelease and xacquire then set the
2311  // opcode to those instead of the rep and repne opcodes.
2312  if (insn.xAcquireRelease) {
2313  if(mcInst.getOpcode() == X86::REP_PREFIX)
2314  mcInst.setOpcode(X86::XRELEASE_PREFIX);
2315  else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
2316  mcInst.setOpcode(X86::XACQUIRE_PREFIX);
2317  }
2318 
2319  insn.numImmediatesTranslated = 0;
2320 
2321  for (const auto &Op : insn.operands) {
2322  if (Op.encoding != ENCODING_NONE) {
2323  if (translateOperand(mcInst, Op, insn, Dis)) {
2324  return true;
2325  }
2326  }
2327  }
2328 
2329  return false;
2330 }
2331 
2333  const MCSubtargetInfo &STI,
2334  MCContext &Ctx) {
2335  std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
2336  return new X86GenericDisassembler(STI, Ctx, std::move(MII));
2337 }
2338 
2340  // Register the disassembler.
2345 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm::X86Disassembler::MODE_16BIT
@ MODE_16BIT
Definition: X86DisassemblerDecoderCommon.h:468
i
i
Definition: README.txt:29
byte
SSE Variable shift can be custom lowered to something like which uses a small table unaligned load shuffle instead of going through memory byte
Definition: README-SSE.txt:11
llvm::X86Disassembler::InternalInstruction::xAcquireRelease
bool xAcquireRelease
Definition: X86DisassemblerDecoder.h:547
vvvvFromVEX3of3
#define vvvvFromVEX3of3(vex)
Definition: X86DisassemblerDecoder.h:56
llvm::X86Disassembler::InternalInstruction::vectorExtensionType
VectorExtensionType vectorExtensionType
Definition: X86DisassemblerDecoder.h:541
bFromXOP2of3
#define bFromXOP2of3(xop)
Definition: X86DisassemblerDecoder.h:67
baseFromSIB
#define baseFromSIB(sib)
Definition: X86DisassemblerDecoder.h:30
llvm::X86Disassembler::MODE_64BIT
@ MODE_64BIT
Definition: X86DisassemblerDecoderCommon.h:470
llvm::X86Disassembler::InternalInstruction::hasLockPrefix
bool hasLockPrefix
Definition: X86DisassemblerDecoder.h:554
llvm::X86Disassembler::InternalInstruction::displacement
int32_t displacement
Definition: X86DisassemblerDecoder.h:603
xFromXOP2of3
#define xFromXOP2of3(xop)
Definition: X86DisassemblerDecoder.h:66
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
THREEDNOW_MAP_SYM
#define THREEDNOW_MAP_SYM
Definition: X86DisassemblerDecoderCommon.h:33
lFromVEX2of2
#define lFromVEX2of2(vex)
Definition: X86DisassemblerDecoder.h:62
llvm::X86Disassembler::TYPE_NO_VEX_XOP
@ TYPE_NO_VEX_XOP
Definition: X86DisassemblerDecoder.h:505
wFromVEX3of3
#define wFromVEX3of3(vex)
Definition: X86DisassemblerDecoder.h:55
rFromEVEX2of4
#define rFromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:36
llvm::X86Disassembler::VEX_LOB_0F3A
@ VEX_LOB_0F3A
Definition: X86DisassemblerDecoder.h:485
llvm::X86Disassembler::SEG_OVERRIDE_GS
@ SEG_OVERRIDE_GS
Definition: X86DisassemblerDecoder.h:477
OpcodeDecision
Definition: X86Disassembler.cpp:108
OpcodeDecision::modRMDecisions
ModRMDecision modRMDecisions[256]
Definition: X86Disassembler.cpp:109
ppFromXOP3of3
#define ppFromXOP3of3(xop)
Definition: X86DisassemblerDecoder.h:72
MCDisassembler.h
llvm::X86::BP_SI
@ BP_SI
Definition: X86Disassembler.cpp:1681
debug
#define debug(s)
Definition: X86Disassembler.cpp:96
T
llvm::MCOperand::createImm
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:76
llvm::X86Disassembler::ATTR_REXW
@ ATTR_REXW
Definition: X86DisassemblerDecoderCommon.h:58
llvm::X86::IP_HAS_OP_SIZE
@ IP_HAS_OP_SIZE
Definition: X86BaseInfo.h:58
llvm::getTheX86_64Target
Target & getTheX86_64Target()
Definition: X86TargetInfo.cpp:17
is64Bit
static bool is64Bit(const char *name)
Definition: X86Disassembler.cpp:1015
CASE_ENCODING_RM
#define CASE_ENCODING_RM
Definition: X86DisassemblerDecoderCommon.h:341
llvm::X86Disassembler::TYPE_EVEX
@ TYPE_EVEX
Definition: X86DisassemblerDecoder.h:508
llvm::X86Disassembler::InstructionSpecifier::operands
uint16_t operands
Definition: X86DisassemblerDecoder.h:515
llvm::X86Disassembler::SEG_OVERRIDE_SS
@ SEG_OVERRIDE_SS
Definition: X86DisassemblerDecoder.h:473
name
static const char * name
Definition: SMEABIPass.cpp:49
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:149
op
#define op(i)
translateImmediate
static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateImmediate - Appends an immediate operand to an MCInst.
Definition: X86Disassembler.cpp:1861
isREX
static bool isREX(struct InternalInstruction *insn, uint8_t prefix)
Definition: X86Disassembler.cpp:202
mmmmmFromVEX2of3
#define mmmmmFromVEX2of3(vex)
Definition: X86DisassemblerDecoder.h:54
ModRMDecision::instructionIDs
uint16_t instructionIDs
Definition: X86Disassembler.cpp:103
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
ppFromVEX2of2
#define ppFromVEX2of2(vex)
Definition: X86DisassemblerDecoder.h:63
llvm::X86Disassembler::VEX_LOB_0F38
@ VEX_LOB_0F38
Definition: X86DisassemblerDecoder.h:484
Fail
#define Fail
Definition: AArch64Disassembler.cpp:301
llvm::X86Disassembler::InternalInstruction::length
size_t length
Definition: X86DisassemblerDecoder.h:532
llvm::X86Disassembler::InternalInstruction::consumedModRM
bool consumedModRM
Definition: X86DisassemblerDecoder.h:596
llvm::MCDisassembler::tryAddingSymbolicOperand
bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) const
Definition: MCDisassembler.cpp:28
llvm::X86Disassembler::InternalInstruction::segmentOverride
SegmentOverride segmentOverride
Definition: X86DisassemblerDecoder.h:545
llvm::X86Disassembler::SEG_OVERRIDE_FS
@ SEG_OVERRIDE_FS
Definition: X86DisassemblerDecoder.h:476
modFromModRM
#define modFromModRM(modRM)
Definition: X86DisassemblerDecoder.h:25
scaleFromSIB
#define scaleFromSIB(sib)
Definition: X86DisassemblerDecoder.h:28
llvm::X86AS::FS
@ FS
Definition: X86.h:201
llvm::X86::IP_HAS_LOCK
@ IP_HAS_LOCK
Definition: X86BaseInfo.h:62
llvm::X86Disassembler::InternalInstruction::opcodeType
OpcodeType opcodeType
Definition: X86DisassemblerDecoder.h:577
readModRM
static int readModRM(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:603
translateFPRegister
static void translateFPRegister(MCInst &mcInst, uint8_t stackPos)
translateFPRegister - Translates a stack position on the FPU stack to its LLVM form,...
Definition: X86Disassembler.cpp:2207
nextByte
static int nextByte(ArrayRef< uint8_t > Bytes, uint64_t &Size)
Definition: WebAssemblyDisassembler.cpp:75
createX86Disassembler
static MCDisassembler * createX86Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
Definition: X86Disassembler.cpp:2332
llvm::X86Disassembler::EA_BASE_NONE
@ EA_BASE_NONE
Definition: X86DisassemblerDecoder.h:419
llvm::X86Disassembler::TYPE_XOP
@ TYPE_XOP
Definition: X86DisassemblerDecoder.h:509
llvm::X86Disassembler::InternalInstruction::sibIndex
SIBIndex sibIndex
Definition: X86DisassemblerDecoder.h:629
llvm::X86Disassembler::XOP_MAP_SELECT_8
@ XOP_MAP_SELECT_8
Definition: X86DisassemblerDecoder.h:491
llvm::TargetRegistry::RegisterMCDisassembler
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.
Definition: TargetRegistry.h:972
rFromXOP2of3
#define rFromXOP2of3(xop)
Definition: X86DisassemblerDecoder.h:65
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:41
llvm::X86Disassembler::MODE_32BIT
@ MODE_32BIT
Definition: X86DisassemblerDecoderCommon.h:469
X86DisassemblerDecoder.h
rFromVEX2of3
#define rFromVEX2of3(vex)
Definition: X86DisassemblerDecoder.h:51
MAP6_SYM
#define MAP6_SYM
Definition: X86DisassemblerDecoderCommon.h:35
ModRMDecision
Definition: X86Disassembler.cpp:101
llvm::X86::BX_DI
@ BX_DI
Definition: X86Disassembler.cpp:1680
translateSrcIndex
static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn)
translateSrcIndex - Appends a source index operand to an MCInst.
Definition: X86Disassembler.cpp:1814
vvvvFromVEX2of2
#define vvvvFromVEX2of2(vex)
Definition: X86DisassemblerDecoder.h:61
mmmFromEVEX2of4
#define mmmFromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:40
llvm::X86Disassembler::ATTR_EVEX
@ ATTR_EVEX
Definition: X86DisassemblerDecoderCommon.h:63
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
llvm::X86Disassembler::ATTR_EVEXK
@ ATTR_EVEXK
Definition: X86DisassemblerDecoderCommon.h:65
MAP5_SYM
#define MAP5_SYM
Definition: X86DisassemblerDecoderCommon.h:34
llvm::N86::ESI
@ ESI
Definition: X86MCTargetDesc.h:51
fixupReg
static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)
Definition: X86Disassembler.cpp:834
Format.h
ALL_SIB_BASES
#define ALL_SIB_BASES
Definition: X86DisassemblerDecoder.h:393
x86
Note that only the low bits of effective_addr2 are used On bit we don t eliminate the computation of the top half of effective_addr2 because we don t have whole function selection dags On x86
Definition: README.txt:318
rFromVEX2of2
#define rFromVEX2of2(vex)
Definition: X86DisassemblerDecoder.h:60
llvm::X86Disassembler::InternalInstruction::opcode
uint8_t opcode
Definition: X86DisassemblerDecoder.h:572
llvm::X86Disassembler::InternalInstruction::numImmediatesTranslated
uint8_t numImmediatesTranslated
Definition: X86DisassemblerDecoder.h:607
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
translateRMRegister
static bool translateRMRegister(MCInst &mcInst, InternalInstruction &insn)
translateRMRegister - Translates a register stored in the R/M field of the ModR/M byte to its LLVM eq...
Definition: X86Disassembler.cpp:1966
peek
static bool peek(struct InternalInstruction *insn, uint8_t &byte)
Definition: X86Disassembler.cpp:184
llvm::X86Disassembler::ATTR_XS
@ ATTR_XS
Definition: X86DisassemblerDecoderCommon.h:56
bFromREX
#define bFromREX(rex)
Definition: X86DisassemblerDecoder.h:34
getInstructionIDWithAttrMask
static int getInstructionIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)
Definition: X86Disassembler.cpp:1026
llvm::MCInst::setOpcode
void setOpcode(unsigned Op)
Definition: MCInst.h:197
llvm::X86Disassembler::InternalInstruction::opcodeRegister
Reg opcodeRegister
Definition: X86DisassemblerDecoder.h:611
llvm::X86Disassembler::InstructionContext
InstructionContext
Definition: X86DisassemblerDecoderCommon.h:281
llvm::X86Disassembler::EA_DISP_32
@ EA_DISP_32
Definition: X86DisassemblerDecoder.h:458
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
xFromREX
#define xFromREX(rex)
Definition: X86DisassemblerDecoder.h:33
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
EA_BASES_64BIT
#define EA_BASES_64BIT
Definition: X86DisassemblerDecoder.h:169
llvm::X86Disassembler::SEG_OVERRIDE_CS
@ SEG_OVERRIDE_CS
Definition: X86DisassemblerDecoder.h:472
r2FromEVEX2of4
#define r2FromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:39
llvm::X86Disassembler::ATTR_EVEXL2
@ ATTR_EVEXL2
Definition: X86DisassemblerDecoderCommon.h:64
readSIB
static int readSIB(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:510
llvm::X86::sib64
@ sib64
Definition: X86Disassembler.cpp:1684
llvm::X86Disassembler::SIB_BASE_NONE
@ SIB_BASE_NONE
Definition: X86DisassemblerDecoder.h:446
llvm::X86Disassembler::MAP5
@ MAP5
Definition: X86DisassemblerDecoderCommon.h:298
llvm::X86Disassembler::XOP_MAP_SELECT_A
@ XOP_MAP_SELECT_A
Definition: X86DisassemblerDecoder.h:493
lFromVEX3of3
#define lFromVEX3of3(vex)
Definition: X86DisassemblerDecoder.h:57
llvm::X86Disassembler::THREEBYTE_38
@ THREEBYTE_38
Definition: X86DisassemblerDecoderCommon.h:292
llvm::X86Disassembler::VEX_LOB_0F
@ VEX_LOB_0F
Definition: X86DisassemblerDecoder.h:483
wFromEVEX3of4
#define wFromEVEX3of4(evex)
Definition: X86DisassemblerDecoder.h:41
llvm::X86Disassembler::InternalInstruction
The x86 internal instruction, which is produced by the decoder.
Definition: X86DisassemblerDecoder.h:519
llvm::X86Disassembler::SEG_OVERRIDE_ES
@ SEG_OVERRIDE_ES
Definition: X86DisassemblerDecoder.h:475
llvm::X86::IP_HAS_AD_SIZE
@ IP_HAS_AD_SIZE
Definition: X86BaseInfo.h:59
llvm::X86Disassembler::InternalInstruction::rexPrefix
uint8_t rexPrefix
Definition: X86DisassemblerDecoder.h:543
llvm::support::little
@ little
Definition: Endian.h:27
llvm::X86Disassembler::EA_DISP_16
@ EA_DISP_16
Definition: X86DisassemblerDecoder.h:457
llvm::X86Disassembler::SIB_INDEX_NONE
@ SIB_INDEX_NONE
Definition: X86DisassemblerDecoder.h:434
llvm::X86::BP_DI
@ BP_DI
Definition: X86Disassembler.cpp:1682
MCContext.h
SI
@ SI
Definition: SIInstrInfo.cpp:7993
MCInstrInfo.h
llvm::X86Disassembler::InternalInstruction::hasAdSize
bool hasAdSize
Definition: X86DisassemblerDecoder.h:550
vvvvFromEVEX3of4
#define vvvvFromEVEX3of4(evex)
Definition: X86DisassemblerDecoder.h:42
XOP8_MAP_SYM
#define XOP8_MAP_SYM
Definition: X86DisassemblerDecoderCommon.h:30
llvm::X86Disassembler::InternalInstruction::vvvv
Reg vvvv
Definition: X86DisassemblerDecoder.h:589
MCInst.h
readImmediate
static int readImmediate(struct InternalInstruction *insn, uint8_t size)
Definition: X86Disassembler.cpp:1419
EA_BASES_32BIT
#define EA_BASES_32BIT
Definition: X86DisassemblerDecoder.h:133
MCSubtargetInfo.h
llvm::MCSubtargetInfo::getFeatureBits
const FeatureBitset & getFeatureBits() const
Definition: MCSubtargetInfo.h:112
readOperands
static int readOperands(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:1503
llvm::X86Disassembler::VEX_PREFIX_F2
@ VEX_PREFIX_F2
Definition: X86DisassemblerDecoder.h:501
llvm::X86Disassembler::InternalInstruction::startLocation
uint64_t startLocation
Definition: X86DisassemblerDecoder.h:530
ppFromVEX3of3
#define ppFromVEX3of3(vex)
Definition: X86DisassemblerDecoder.h:58
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
llvm::MCInst::setFlags
void setFlags(unsigned F)
Definition: MCInst.h:200
llvm::X86Disassembler::XOPA_MAP
@ XOPA_MAP
Definition: X86DisassemblerDecoderCommon.h:296
llvm::X86Disassembler::SIBIndex
SIBIndex
All possible values of the SIB index field.
Definition: X86DisassemblerDecoder.h:433
llvm::StringRef::data
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
llvm::X86Disassembler::OperandEncoding
OperandEncoding
Definition: X86DisassemblerDecoderCommon.h:404
llvm::X86Disassembler::EA_DISP_NONE
@ EA_DISP_NONE
Definition: X86DisassemblerDecoder.h:455
translateOperand
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateOperand - Translates an operand stored in an internal instruction to LLVM's format and appen...
Definition: X86Disassembler.cpp:2236
bFromVEX2of3
#define bFromVEX2of3(vex)
Definition: X86DisassemblerDecoder.h:53
wFromREX
#define wFromREX(rex)
Definition: X86DisassemblerDecoder.h:31
llvm::X86Disassembler::ATTR_VEX
@ ATTR_VEX
Definition: X86DisassemblerDecoderCommon.h:61
llvm::MCDisassembler::DecodeStatus
DecodeStatus
Ternary decode status.
Definition: MCDisassembler.h:103
llvm::MCInst::addOperand
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
ONEBYTE_SYM
#define ONEBYTE_SYM
Definition: X86DisassemblerDecoderCommon.h:26
llvm::X86Disassembler::InternalInstruction::displacementSize
uint8_t displacementSize
Definition: X86DisassemblerDecoder.h:561
llvm::X86Disassembler::IC_max
@ IC_max
Definition: X86DisassemblerDecoderCommon.h:283
llvm::X86Disassembler::SIBBase
SIBBase
All possible values of the SIB base field.
Definition: X86DisassemblerDecoder.h:445
GENERIC_FIXUP_FUNC
#define GENERIC_FIXUP_FUNC(name, base, prefix, mask)
Definition: X86Disassembler.cpp:741
readMaskRegister
static int readMaskRegister(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:1490
getInstructionID
static int getInstructionID(struct InternalInstruction *insn, const MCInstrInfo *mii)
Definition: X86Disassembler.cpp:1081
llvm::X86Disassembler::InternalInstruction::modRM
uint8_t modRM
Definition: X86DisassemblerDecoder.h:597
llvm::X86Disassembler::InternalInstruction::eaRegBase
EABase eaRegBase
Definition: X86DisassemblerDecoder.h:617
isPrefix
static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII)
Check if the instruction is a prefix.
Definition: X86AsmBackend.cpp:271
llvm::X86Disassembler::TYPE_VEX_2B
@ TYPE_VEX_2B
Definition: X86DisassemblerDecoder.h:506
llvm::getTheX86_32Target
Target & getTheX86_32Target()
Definition: X86TargetInfo.cpp:13
llvm::X86AS::GS
@ GS
Definition: X86.h:200
llvm::pdb::PDB_ColorItem::Address
@ Address
lFromXOP3of3
#define lFromXOP3of3(xop)
Definition: X86DisassemblerDecoder.h:71
regFromModRM
#define regFromModRM(modRM)
Definition: X86DisassemblerDecoder.h:26
LLVMInitializeX86Disassembler
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler()
Definition: X86Disassembler.cpp:2339
type
AMD64 Optimization Manual has some nice information about optimizing integer multiplication by a constant How much of it applies to Intel s X86 implementation There are definite trade offs to xmm0 cvttss2siq rdx jb L3 subss xmm0 rax cvttss2siq rdx xorq rdx rax ret instead of xmm1 cvttss2siq rcx movaps xmm2 subss xmm2 cvttss2siq rax rdx xorq rax ucomiss xmm0 cmovb rax ret Seems like the jb branch has high likelihood of being taken It would have saved a few instructions It s not possible to reference and DH registers in an instruction requiring REX prefix divb and mulb both produce results in AH If isel emits a CopyFromReg which gets turned into a movb and that can be allocated a r8b r15b To get around isel emits a CopyFromReg from AX and then right shift it down by and truncate it It s not pretty but it works We need some register allocation magic to make the hack go which would often require a callee saved register Callees usually need to keep this value live for most of their body so it doesn t add a significant burden on them We currently implement this in however this is suboptimal because it means that it would be quite awkward to implement the optimization for callers A better implementation would be to relax the LLVM IR rules for sret arguments to allow a function with an sret argument to have a non void return type
Definition: README-X86-64.txt:70
X86MCTargetDesc.h
llvm::X86Disassembler::ATTR_OPSIZE
@ ATTR_OPSIZE
Definition: X86DisassemblerDecoderCommon.h:59
llvm::X86Disassembler::InternalInstruction::RC
uint8_t RC
Definition: X86DisassemblerDecoder.h:634
llvm::X86Disassembler::VEX_PREFIX_F3
@ VEX_PREFIX_F3
Definition: X86DisassemblerDecoder.h:500
llvm::X86Disassembler::InternalInstruction::bytes
llvm::ArrayRef< uint8_t > bytes
Definition: X86DisassemblerDecoder.h:521
llvm::X86::IP_HAS_REPEAT
@ IP_HAS_REPEAT
Definition: X86BaseInfo.h:61
index
splat index
Definition: README_ALTIVEC.txt:181
uint64_t
llvm::X86Disassembler::ATTR_EVEXB
@ ATTR_EVEXB
Definition: X86DisassemblerDecoderCommon.h:67
llvm::X86Disassembler::VEX_LOB_MAP6
@ VEX_LOB_MAP6
Definition: X86DisassemblerDecoder.h:487
llvm::X86Disassembler::ATTR_VEXL
@ ATTR_VEXL
Definition: X86DisassemblerDecoderCommon.h:62
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:127
llvm::X86Disassembler::XOP8_MAP
@ XOP8_MAP
Definition: X86DisassemblerDecoderCommon.h:294
llvm::MCDisassembler::tryAddingPcLoadReferenceComment
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const
Definition: MCDisassembler.cpp:39
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::MCDisassembler
Superclass for all disassemblers.
Definition: MCDisassembler.h:79
ContextDecision
Definition: X86Disassembler.cpp:117
llvm::X86Disassembler::InternalInstruction::writemask
Reg writemask
Definition: X86DisassemblerDecoder.h:592
vvvvFromXOP3of3
#define vvvvFromXOP3of3(vex)
Definition: X86DisassemblerDecoder.h:70
decode
static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)
Definition: X86Disassembler.cpp:123
llvm::X86Disassembler::XOP_MAP_SELECT_9
@ XOP_MAP_SELECT_9
Definition: X86DisassemblerDecoder.h:492
llvm::X86Disassembler::InternalInstruction::immediates
uint64_t immediates[2]
Definition: X86DisassemblerDecoder.h:608
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::X86Disassembler::InternalInstruction::eaBase
EABase eaBase
Definition: X86DisassemblerDecoder.h:622
mmmmmFromXOP2of3
#define mmmmmFromXOP2of3(xop)
Definition: X86DisassemblerDecoder.h:68
llvm::X86Disassembler::EABase
EABase
All possible values of the base field for effective-address computations, a.k.a.
Definition: X86DisassemblerDecoder.h:418
llvm::MCOI::OperandType
OperandType
Operands are tagged with one of the values of this enum.
Definition: MCInstrDesc.h:58
llvm::X86Disassembler::InternalInstruction::reg
Reg reg
Definition: X86DisassemblerDecoder.h:625
llvm::X86Disassembler::InternalInstruction::sib
uint8_t sib
Definition: X86DisassemblerDecoder.h:600
llvm::X86::BX_SI
@ BX_SI
Definition: X86Disassembler.cpp:1679
llvm::X86Disassembler::InternalInstruction::immediateOffset
uint8_t immediateOffset
Definition: X86DisassemblerDecoder.h:567
llvm::X86Disassembler::ONEBYTE
@ ONEBYTE
Definition: X86DisassemblerDecoderCommon.h:290
llvm::X86::IP_NO_PREFIX
@ IP_NO_PREFIX
Definition: X86BaseInfo.h:57
ppFromEVEX3of4
#define ppFromEVEX3of4(evex)
Definition: X86DisassemblerDecoder.h:43
rmFromModRM
#define rmFromModRM(modRM)
Definition: X86DisassemblerDecoder.h:27
llvm::X86Disassembler::InternalInstruction::spec
const InstructionSpecifier * spec
Definition: X86DisassemblerDecoder.h:581
llvm::X86Disassembler::ATTR_ADSIZE
@ ATTR_ADSIZE
Definition: X86DisassemblerDecoderCommon.h:60
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
translateInstruction
static bool translateInstruction(MCInst &target, InternalInstruction &source, const MCDisassembler *Dis)
translateInstruction - Translates an internal instruction and all its operands to an MCInst.
Definition: X86Disassembler.cpp:2299
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1862
base
therefore end up llgh r3 lr r0 br r14 but truncating the load would lh r3 br r14 Functions ret i64 and ought to be implemented ngr r0 br r14 but two address optimizations reverse the order of the AND and ngr r2 lgr r0 br r14 CodeGen SystemZ and ll has several examples of this Out of range displacements are usually handled by loading the full address into a register In many cases it would be better to create an anchor point instead E g i64 base
Definition: README.txt:125
llvm::X86Disassembler::InternalInstruction::instructionID
uint16_t instructionID
Definition: X86DisassemblerDecoder.h:579
llvm::X86Disassembler::ATTR_EVEXKZ
@ ATTR_EVEXKZ
Definition: X86DisassemblerDecoderCommon.h:66
isBranch
static bool isBranch(unsigned Opcode)
Definition: R600InstrInfo.cpp:642
llvm::X86Disassembler::OperandSpecifier::encoding
uint8_t encoding
Definition: X86DisassemblerDecoderCommon.h:458
llvm::MCInstrInfo::getName
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:70
llvm::X86Disassembler::InternalInstruction::addressSize
uint8_t addressSize
Definition: X86DisassemblerDecoder.h:560
llvm::X86Disassembler::InternalInstruction::numImmediatesConsumed
uint8_t numImmediatesConsumed
Definition: X86DisassemblerDecoder.h:606
llvm::X86AS::SS
@ SS
Definition: X86.h:202
zFromEVEX4of4
#define zFromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:44
llvm::X86Disassembler::InternalInstruction::operands
ArrayRef< OperandSpecifier > operands
Definition: X86DisassemblerDecoder.h:636
llvm::X86Disassembler::XOP9_MAP
@ XOP9_MAP
Definition: X86DisassemblerDecoderCommon.h:295
llvm::X86Disassembler::InternalInstruction::displacementOffset
uint8_t displacementOffset
Definition: X86DisassemblerDecoder.h:566
llvm::X86Disassembler::OpcodeType
OpcodeType
Definition: X86DisassemblerDecoderCommon.h:289
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1716
llvm::ArrayRef< uint8_t >
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bFromEVEX2of4
#define bFromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:38
llvm::X86Disassembler::InternalInstruction::regBase
Reg regBase
Definition: X86DisassemblerDecoder.h:618
readVVVV
static int readVVVV(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:1463
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::MCOperand::createReg
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::X86Disassembler::MAP6
@ MAP6
Definition: X86DisassemblerDecoderCommon.h:299
uint32_t
translateDstIndex
static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn)
translateDstIndex - Appends a destination index operand to an MCInst.
Definition: X86Disassembler.cpp:1839
llvm::X86Disassembler::OperandSpecifier
The specification for how to extract and interpret one operand.
Definition: X86DisassemblerDecoderCommon.h:457
readOpcodeRegister
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
Definition: X86Disassembler.cpp:1376
llvm::X86Disassembler::InternalInstruction::repeatPrefix
uint8_t repeatPrefix
Definition: X86DisassemblerDecoder.h:556
llvm::N86::EDI
@ EDI
Definition: X86MCTargetDesc.h:51
translateRM
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateRM - Translates an operand stored in the R/M (and possibly SIB) byte of an instruction to LL...
Definition: X86Disassembler.cpp:2170
llvm::X86Disassembler::InternalInstruction::eaDisplacement
EADisplacement eaDisplacement
Definition: X86DisassemblerDecoder.h:623
rFromREX
#define rFromREX(rex)
Definition: X86DisassemblerDecoder.h:32
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::X86Disassembler::VEX_PREFIX_66
@ VEX_PREFIX_66
Definition: X86DisassemblerDecoder.h:499
llvm::X86::sib
@ sib
Definition: X86Disassembler.cpp:1683
ContextDecision::opcodeDecisions
OpcodeDecision opcodeDecisions[IC_max]
Definition: X86Disassembler.cpp:118
readOpcode
static bool readOpcode(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:871
llvm::X86Disassembler::InternalInstruction::readerCursor
uint64_t readerCursor
Definition: X86DisassemblerDecoder.h:523
TWOBYTE_SYM
#define TWOBYTE_SYM
Definition: X86DisassemblerDecoderCommon.h:27
llvm::X86Disassembler::InternalInstruction::mode
DisassemblerMode mode
Definition: X86DisassemblerDecoder.h:528
llvm::X86Disassembler
Definition: X86DisassemblerDecoderCommon.h:22
v2FromEVEX4of4
#define v2FromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:48
aaaFromEVEX4of4
#define aaaFromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:49
REGS_ZMM
#define REGS_ZMM
Definition: X86DisassemblerDecoder.h:283
readDisplacement
static int readDisplacement(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:572
ALL_REGS
#define ALL_REGS
Definition: X86DisassemblerDecoder.h:397
llvm::X86Disassembler::InternalInstruction::hasOpSize
bool hasOpSize
Definition: X86DisassemblerDecoder.h:552
llvm::X86Disassembler::ATTR_XD
@ ATTR_XD
Definition: X86DisassemblerDecoderCommon.h:57
llvm::MCInstrInfo
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
Insn
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
Definition: AArch64MIPeepholeOpt.cpp:129
llvm::X86Disassembler::SEG_OVERRIDE_max
@ SEG_OVERRIDE_max
Definition: X86DisassemblerDecoder.h:478
REGS_YMM
#define REGS_YMM
Definition: X86DisassemblerDecoder.h:249
std
Definition: BitVector.h:851
XOPA_MAP_SYM
#define XOPA_MAP_SYM
Definition: X86DisassemblerDecoderCommon.h:32
translateRegister
static void translateRegister(MCInst &mcInst, Reg reg)
translateRegister - Translates an internal register to the appropriate LLVM register,...
Definition: X86Disassembler.cpp:1791
ALL_EA_BASES
#define ALL_EA_BASES
Definition: X86DisassemblerDecoder.h:388
uint16_t
translateRMMemory
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, const MCDisassembler *Dis, bool ForceSIB=false)
translateRMMemory - Translates a memory operand stored in the Mod and R/M fields of an internal instr...
Definition: X86Disassembler.cpp:2005
THREEBYTE38_SYM
#define THREEBYTE38_SYM
Definition: X86DisassemblerDecoderCommon.h:28
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:354
XOP9_MAP_SYM
#define XOP9_MAP_SYM
Definition: X86DisassemblerDecoderCommon.h:31
translateMaskRegister
static bool translateMaskRegister(MCInst &mcInst, uint8_t maskRegNum)
translateMaskRegister - Translates a 3-bit mask register number to LLVM form, and appends it to an MC...
Definition: X86Disassembler.cpp:2218
Success
#define Success
Definition: AArch64Disassembler.cpp:300
llvm::X86Disassembler::OperandSpecifier::type
uint8_t type
Definition: X86DisassemblerDecoderCommon.h:459
lFromEVEX4of4
#define lFromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:46
llvm::X86::IP_HAS_REPEAT_NE
@ IP_HAS_REPEAT_NE
Definition: X86BaseInfo.h:60
llvm::MCInst::getOpcode
unsigned getOpcode() const
Definition: MCInst.h:198
llvm::X86Disassembler::TWOBYTE
@ TWOBYTE
Definition: X86DisassemblerDecoderCommon.h:291
llvm::X86Disassembler::TYPE_VEX_3B
@ TYPE_VEX_3B
Definition: X86DisassemblerDecoder.h:507
wFromXOP3of3
#define wFromXOP3of3(xop)
Definition: X86DisassemblerDecoder.h:69
REGS_XMM
#define REGS_XMM
Definition: X86DisassemblerDecoder.h:215
THREEBYTE3A_SYM
#define THREEBYTE3A_SYM
Definition: X86DisassemblerDecoderCommon.h:29
llvm::X86Disassembler::EA_DISP_8
@ EA_DISP_8
Definition: X86DisassemblerDecoder.h:456
instr
@ instr
Definition: HWAddressSanitizer.cpp:194
X86BaseInfo.h
llvm::X86Disassembler::DisassemblerMode
DisassemblerMode
Decoding mode for the Intel disassembler.
Definition: X86DisassemblerDecoderCommon.h:467
llvm::X86Disassembler::InternalInstruction::mandatoryPrefix
uint8_t mandatoryPrefix
Definition: X86DisassemblerDecoder.h:537
l2FromEVEX4of4
#define l2FromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:45
llvm::X86Disassembler::InternalInstruction::sibIndexBase
SIBIndex sibIndexBase
Definition: X86DisassemblerDecoder.h:628
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
INSTRUCTIONS_SYM
#define INSTRUCTIONS_SYM
Definition: X86DisassemblerDecoderCommon.h:24
llvm::X86Disassembler::InternalInstruction::sibScale
uint8_t sibScale
Definition: X86DisassemblerDecoder.h:630
is16BitEquivalent
static bool is16BitEquivalent(const char *orig, const char *equiv)
Definition: X86Disassembler.cpp:996
consume
static bool consume(InternalInstruction *insn, T &ptr)
Definition: X86Disassembler.cpp:192
bFromEVEX4of4
#define bFromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:47
llvm::MCOperand
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
CASE_ENCODING_VSIB
#define CASE_ENCODING_VSIB
Definition: X86DisassemblerDecoderCommon.h:350
llvm::omp::RTLDependInfoFields::Flags
@ Flags
xFromEVEX2of4
#define xFromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:37
llvm::X86Disassembler::OperandType
OperandType
Definition: X86DisassemblerDecoderCommon.h:450
llvm::X86Disassembler::VEX_LOB_MAP5
@ VEX_LOB_MAP5
Definition: X86DisassemblerDecoder.h:486
xFromVEX2of3
#define xFromVEX2of3(vex)
Definition: X86DisassemblerDecoder.h:52
llvm::X86Disassembler::THREEDNOW_MAP
@ THREEDNOW_MAP
Definition: X86DisassemblerDecoderCommon.h:297
llvm::X86Disassembler::InstrUID
uint16_t InstrUID
Definition: X86DisassemblerDecoderCommon.h:309
llvm::X86Disassembler::SEG_OVERRIDE_DS
@ SEG_OVERRIDE_DS
Definition: X86DisassemblerDecoder.h:474
llvm::X86Disassembler::InstructionSpecifier
The specification for how to extract and interpret a full instruction and its operands.
Definition: X86DisassemblerDecoder.h:514
readPrefixes
static int readPrefixes(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:211
raw_ostream.h
llvm::X86Disassembler::InternalInstruction::registerSize
uint8_t registerSize
Definition: X86DisassemblerDecoder.h:559
indexFromSIB
#define indexFromSIB(sib)
Definition: X86DisassemblerDecoder.h:29
segmentRegnums
static const uint8_t segmentRegnums[SEG_OVERRIDE_max]
Definition: X86Disassembler.cpp:1800
llvm::X86Disassembler::ATTR_64BIT
@ ATTR_64BIT
Definition: X86DisassemblerDecoderCommon.h:55
X86
Unrolling by would eliminate the &in both leading to a net reduction in code size The resultant code would then also be suitable for exit value computation We miss a bunch of rotate opportunities on various including etc On X86
Definition: README.txt:568
X86TargetInfo.h
TargetRegistry.h
ModRMDecision::modrm_type
uint8_t modrm_type
Definition: X86Disassembler.cpp:102
MCExpr.h
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:76
llvm::X86Disassembler::THREEBYTE_3A
@ THREEBYTE_3A
Definition: X86DisassemblerDecoderCommon.h:293
llvm::X86Disassembler::ATTR_NONE
@ ATTR_NONE
Definition: X86DisassemblerDecoderCommon.h:54
Debug.h
llvm::MCInst::clear
void clear()
Definition: MCInst.h:215
llvm::X86Disassembler::InternalInstruction::immediateSize
uint8_t immediateSize
Definition: X86DisassemblerDecoder.h:562
llvm::X86Disassembler::InternalInstruction::vectorExtensionPrefix
uint8_t vectorExtensionPrefix[4]
Definition: X86DisassemblerDecoder.h:539
llvm::X86Disassembler::InternalInstruction::sibBase
SIBBase sibBase
Definition: X86DisassemblerDecoder.h:631