LLVM  13.0.0git
X86Disassembler.cpp
Go to the documentation of this file.
1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is part of the X86 Disassembler.
10 // It contains code to translate the data produced by the decoder into
11 // MCInsts.
12 //
13 //
14 // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
15 // 64-bit X86 instruction sets. The main decode sequence for an assembly
16 // instruction in this disassembler is:
17 //
18 // 1. Read the prefix bytes and determine the attributes of the instruction.
19 // These attributes, recorded in enum attributeBits
20 // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
21 // provides a mapping from bitmasks to contexts, which are represented by
22 // enum InstructionContext (ibid.).
23 //
24 // 2. Read the opcode, and determine what kind of opcode it is. The
25 // disassembler distinguishes four kinds of opcodes, which are enumerated in
26 // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
27 // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
28 // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
29 //
30 // 3. Depending on the opcode type, look in one of four ClassDecision structures
31 // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
32 // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
33 // a ModRMDecision (ibid.).
34 //
35 // 4. Some instructions, such as escape opcodes or extended opcodes, or even
36 // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
37 // ModR/M byte to complete decode. The ModRMDecision's type is an entry from
38 // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
39 // ModR/M byte is required and how to interpret it.
40 //
41 // 5. After resolving the ModRMDecision, the disassembler has a unique ID
42 // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
43 // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
44 // meanings of its operands.
45 //
46 // 6. For each operand, its encoding is an entry from OperandEncoding
47 // (X86DisassemblerDecoderCommon.h) and its type is an entry from
48 // OperandType (ibid.). The encoding indicates how to read it from the
49 // instruction; the type indicates how to interpret the value once it has
50 // been read. For example, a register operand could be stored in the R/M
51 // field of the ModR/M byte, the REG field of the ModR/M byte, or added to
52 // the main opcode. This is orthogonal from its meaning (an GPR or an XMM
53 // register, for instance). Given this information, the operands can be
54 // extracted and interpreted.
55 //
56 // 7. As the last step, the disassembler translates the instruction information
57 // and operands into a format understandable by the client - in this case, an
58 // MCInst for use by the MC infrastructure.
59 //
60 // The disassembler is broken broadly into two parts: the table emitter that
61 // emits the instruction decode tables discussed above during compilation, and
62 // the disassembler itself. The table emitter is documented in more detail in
63 // utils/TableGen/X86DisassemblerEmitter.h.
64 //
65 // X86Disassembler.cpp contains the code responsible for step 7, and for
66 // invoking the decoder to execute steps 1-6.
67 // X86DisassemblerDecoderCommon.h contains the definitions needed by both the
68 // table emitter and the disassembler.
69 // X86DisassemblerDecoder.h contains the public interface of the decoder,
70 // factored out into C for possible use by other projects.
71 // X86DisassemblerDecoder.c contains the source code of the decoder, which is
72 // responsible for steps 1-6.
73 //
74 //===----------------------------------------------------------------------===//
75 
79 #include "X86DisassemblerDecoder.h"
80 #include "llvm/MC/MCContext.h"
82 #include "llvm/MC/MCExpr.h"
83 #include "llvm/MC/MCInst.h"
84 #include "llvm/MC/MCInstrInfo.h"
86 #include "llvm/Support/Debug.h"
87 #include "llvm/Support/Format.h"
90 
91 using namespace llvm;
92 using namespace llvm::X86Disassembler;
93 
94 #define DEBUG_TYPE "x86-disassembler"
95 
96 #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
97 
98 // Specifies whether a ModR/M byte is needed and (if so) which
99 // instruction each possible value of the ModR/M byte corresponds to. Once
100 // this information is known, we have narrowed down to a single instruction.
102  uint8_t modrm_type;
104 };
105 
106 // Specifies which set of ModR/M->instruction tables to look at
107 // given a particular opcode.
109  ModRMDecision modRMDecisions[256];
110 };
111 
112 // Specifies which opcode->instruction tables to look at given
113 // a particular context (set of attributes). Since there are many possible
114 // contexts, the decoder first uses CONTEXTS_SYM to determine which context
115 // applies given a specific set of attributes. Hence there are only IC_max
116 // entries in this table, rather than 2^(ATTR_max).
118  OpcodeDecision opcodeDecisions[IC_max];
119 };
120 
121 #include "X86GenDisassemblerTables.inc"
122 
124  uint8_t opcode, uint8_t modRM) {
125  const struct ModRMDecision *dec;
126 
127  switch (type) {
128  case ONEBYTE:
129  dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
130  break;
131  case TWOBYTE:
132  dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
133  break;
134  case THREEBYTE_38:
135  dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
136  break;
137  case THREEBYTE_3A:
138  dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
139  break;
140  case XOP8_MAP:
141  dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
142  break;
143  case XOP9_MAP:
144  dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
145  break;
146  case XOPA_MAP:
147  dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
148  break;
149  case THREEDNOW_MAP:
150  dec =
151  &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
152  break;
153  }
154 
155  switch (dec->modrm_type) {
156  default:
157  llvm_unreachable("Corrupt table! Unknown modrm_type");
158  return 0;
159  case MODRM_ONEENTRY:
160  return modRMTable[dec->instructionIDs];
161  case MODRM_SPLITRM:
162  if (modFromModRM(modRM) == 0x3)
163  return modRMTable[dec->instructionIDs + 1];
164  return modRMTable[dec->instructionIDs];
165  case MODRM_SPLITREG:
166  if (modFromModRM(modRM) == 0x3)
167  return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];
168  return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
169  case MODRM_SPLITMISC:
170  if (modFromModRM(modRM) == 0x3)
171  return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];
172  return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
173  case MODRM_FULL:
174  return modRMTable[dec->instructionIDs + modRM];
175  }
176 }
177 
178 static bool peek(struct InternalInstruction *insn, uint8_t &byte) {
179  uint64_t offset = insn->readerCursor - insn->startLocation;
180  if (offset >= insn->bytes.size())
181  return true;
182  byte = insn->bytes[offset];
183  return false;
184 }
185 
186 template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {
187  auto r = insn->bytes;
188  uint64_t offset = insn->readerCursor - insn->startLocation;
189  if (offset + sizeof(T) > r.size())
190  return true;
191  T ret = 0;
192  for (unsigned i = 0; i < sizeof(T); ++i)
193  ret |= (uint64_t)r[offset + i] << (i * 8);
194  ptr = ret;
195  insn->readerCursor += sizeof(T);
196  return false;
197 }
198 
199 static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
200  return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
201 }
202 
203 // Consumes all of an instruction's prefix bytes, and marks the
204 // instruction as having them. Also sets the instruction's default operand,
205 // address, and other relevant data sizes to report operands correctly.
206 //
207 // insn must not be empty.
208 static int readPrefixes(struct InternalInstruction *insn) {
209  bool isPrefix = true;
210  uint8_t byte = 0;
211  uint8_t nextByte;
212 
213  LLVM_DEBUG(dbgs() << "readPrefixes()");
214 
215  while (isPrefix) {
216  // If we fail reading prefixes, just stop here and let the opcode reader
217  // deal with it.
218  if (consume(insn, byte))
219  break;
220 
221  // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
222  // break and let it be disassembled as a normal "instruction".
223  if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
224  break;
225 
226  if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) {
227  // If the byte is 0xf2 or 0xf3, and any of the following conditions are
228  // met:
229  // - it is followed by a LOCK (0xf0) prefix
230  // - it is followed by an xchg instruction
231  // then it should be disassembled as a xacquire/xrelease not repne/rep.
232  if (((nextByte == 0xf0) ||
233  ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
234  insn->xAcquireRelease = true;
235  if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
236  break;
237  }
238  // Also if the byte is 0xf3, and the following condition is met:
239  // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
240  // "mov mem, imm" (opcode 0xc6/0xc7) instructions.
241  // then it should be disassembled as an xrelease not rep.
242  if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
243  nextByte == 0xc6 || nextByte == 0xc7)) {
244  insn->xAcquireRelease = true;
245  break;
246  }
247  if (isREX(insn, nextByte)) {
248  uint8_t nnextByte;
249  // Go to REX prefix after the current one
250  if (consume(insn, nnextByte))
251  return -1;
252  // We should be able to read next byte after REX prefix
253  if (peek(insn, nnextByte))
254  return -1;
255  --insn->readerCursor;
256  }
257  }
258 
259  switch (byte) {
260  case 0xf0: // LOCK
261  insn->hasLockPrefix = true;
262  break;
263  case 0xf2: // REPNE/REPNZ
264  case 0xf3: { // REP or REPE/REPZ
265  uint8_t nextByte;
266  if (peek(insn, nextByte))
267  break;
268  // TODO:
269  // 1. There could be several 0x66
270  // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
271  // it's not mandatory prefix
272  // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
273  // 0x0f exactly after it to be mandatory prefix
274  if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)
275  // The last of 0xf2 /0xf3 is mandatory prefix
276  insn->mandatoryPrefix = byte;
277  insn->repeatPrefix = byte;
278  break;
279  }
280  case 0x2e: // CS segment override -OR- Branch not taken
282  break;
283  case 0x36: // SS segment override -OR- Branch taken
285  break;
286  case 0x3e: // DS segment override
288  break;
289  case 0x26: // ES segment override
291  break;
292  case 0x64: // FS segment override
294  break;
295  case 0x65: // GS segment override
297  break;
298  case 0x66: { // Operand-size override {
299  uint8_t nextByte;
300  insn->hasOpSize = true;
301  if (peek(insn, nextByte))
302  break;
303  // 0x66 can't overwrite existing mandatory prefix and should be ignored
304  if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
305  insn->mandatoryPrefix = byte;
306  break;
307  }
308  case 0x67: // Address-size override
309  insn->hasAdSize = true;
310  break;
311  default: // Not a prefix byte
312  isPrefix = false;
313  break;
314  }
315 
316  if (isPrefix)
317  LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte));
318  }
319 
321 
322  if (byte == 0x62) {
323  uint8_t byte1, byte2;
324  if (consume(insn, byte1)) {
325  LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");
326  return -1;
327  }
328 
329  if (peek(insn, byte2)) {
330  LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
331  return -1;
332  }
333 
334  if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
335  ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
337  } else {
338  --insn->readerCursor; // unconsume byte1
339  --insn->readerCursor; // unconsume byte
340  }
341 
342  if (insn->vectorExtensionType == TYPE_EVEX) {
343  insn->vectorExtensionPrefix[0] = byte;
344  insn->vectorExtensionPrefix[1] = byte1;
345  if (consume(insn, insn->vectorExtensionPrefix[2])) {
346  LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
347  return -1;
348  }
349  if (consume(insn, insn->vectorExtensionPrefix[3])) {
350  LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");
351  return -1;
352  }
353 
354  // We simulate the REX prefix for simplicity's sake
355  if (insn->mode == MODE_64BIT) {
356  insn->rexPrefix = 0x40 |
357  (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |
358  (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |
359  (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |
360  (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
361  }
362 
363  LLVM_DEBUG(
364  dbgs() << format(
365  "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
366  insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
367  insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]));
368  }
369  } else if (byte == 0xc4) {
370  uint8_t byte1;
371  if (peek(insn, byte1)) {
372  LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
373  return -1;
374  }
375 
376  if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
378  else
379  --insn->readerCursor;
380 
381  if (insn->vectorExtensionType == TYPE_VEX_3B) {
382  insn->vectorExtensionPrefix[0] = byte;
383  consume(insn, insn->vectorExtensionPrefix[1]);
384  consume(insn, insn->vectorExtensionPrefix[2]);
385 
386  // We simulate the REX prefix for simplicity's sake
387 
388  if (insn->mode == MODE_64BIT)
389  insn->rexPrefix = 0x40 |
390  (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) |
391  (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) |
392  (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) |
393  (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
394 
395  LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
396  insn->vectorExtensionPrefix[0],
397  insn->vectorExtensionPrefix[1],
398  insn->vectorExtensionPrefix[2]));
399  }
400  } else if (byte == 0xc5) {
401  uint8_t byte1;
402  if (peek(insn, byte1)) {
403  LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
404  return -1;
405  }
406 
407  if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
409  else
410  --insn->readerCursor;
411 
412  if (insn->vectorExtensionType == TYPE_VEX_2B) {
413  insn->vectorExtensionPrefix[0] = byte;
414  consume(insn, insn->vectorExtensionPrefix[1]);
415 
416  if (insn->mode == MODE_64BIT)
417  insn->rexPrefix =
418  0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
419 
420  switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
421  default:
422  break;
423  case VEX_PREFIX_66:
424  insn->hasOpSize = true;
425  break;
426  }
427 
428  LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx",
429  insn->vectorExtensionPrefix[0],
430  insn->vectorExtensionPrefix[1]));
431  }
432  } else if (byte == 0x8f) {
433  uint8_t byte1;
434  if (peek(insn, byte1)) {
435  LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");
436  return -1;
437  }
438 
439  if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction.
441  else
442  --insn->readerCursor;
443 
444  if (insn->vectorExtensionType == TYPE_XOP) {
445  insn->vectorExtensionPrefix[0] = byte;
446  consume(insn, insn->vectorExtensionPrefix[1]);
447  consume(insn, insn->vectorExtensionPrefix[2]);
448 
449  // We simulate the REX prefix for simplicity's sake
450 
451  if (insn->mode == MODE_64BIT)
452  insn->rexPrefix = 0x40 |
453  (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) |
454  (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) |
455  (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) |
456  (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
457 
458  switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
459  default:
460  break;
461  case VEX_PREFIX_66:
462  insn->hasOpSize = true;
463  break;
464  }
465 
466  LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
467  insn->vectorExtensionPrefix[0],
468  insn->vectorExtensionPrefix[1],
469  insn->vectorExtensionPrefix[2]));
470  }
471  } else if (isREX(insn, byte)) {
472  if (peek(insn, nextByte))
473  return -1;
474  insn->rexPrefix = byte;
475  LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte));
476  } else
477  --insn->readerCursor;
478 
479  if (insn->mode == MODE_16BIT) {
480  insn->registerSize = (insn->hasOpSize ? 4 : 2);
481  insn->addressSize = (insn->hasAdSize ? 4 : 2);
482  insn->displacementSize = (insn->hasAdSize ? 4 : 2);
483  insn->immediateSize = (insn->hasOpSize ? 4 : 2);
484  } else if (insn->mode == MODE_32BIT) {
485  insn->registerSize = (insn->hasOpSize ? 2 : 4);
486  insn->addressSize = (insn->hasAdSize ? 2 : 4);
487  insn->displacementSize = (insn->hasAdSize ? 2 : 4);
488  insn->immediateSize = (insn->hasOpSize ? 2 : 4);
489  } else if (insn->mode == MODE_64BIT) {
490  if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
491  insn->registerSize = 8;
492  insn->addressSize = (insn->hasAdSize ? 4 : 8);
493  insn->displacementSize = 4;
494  insn->immediateSize = 4;
495  insn->hasOpSize = false;
496  } else {
497  insn->registerSize = (insn->hasOpSize ? 2 : 4);
498  insn->addressSize = (insn->hasAdSize ? 4 : 8);
499  insn->displacementSize = (insn->hasOpSize ? 2 : 4);
500  insn->immediateSize = (insn->hasOpSize ? 2 : 4);
501  }
502  }
503 
504  return 0;
505 }
506 
507 // Consumes the SIB byte to determine addressing information.
508 static int readSIB(struct InternalInstruction *insn) {
509  SIBBase sibBaseBase = SIB_BASE_NONE;
510  uint8_t index, base;
511 
512  LLVM_DEBUG(dbgs() << "readSIB()");
513  switch (insn->addressSize) {
514  case 2:
515  default:
516  llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");
517  case 4:
518  insn->sibIndexBase = SIB_INDEX_EAX;
519  sibBaseBase = SIB_BASE_EAX;
520  break;
521  case 8:
522  insn->sibIndexBase = SIB_INDEX_RAX;
523  sibBaseBase = SIB_BASE_RAX;
524  break;
525  }
526 
527  if (consume(insn, insn->sib))
528  return -1;
529 
530  index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
531 
532  if (index == 0x4) {
533  insn->sibIndex = SIB_INDEX_NONE;
534  } else {
535  insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
536  }
537 
538  insn->sibScale = 1 << scaleFromSIB(insn->sib);
539 
540  base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
541 
542  switch (base) {
543  case 0x5:
544  case 0xd:
545  switch (modFromModRM(insn->modRM)) {
546  case 0x0:
547  insn->eaDisplacement = EA_DISP_32;
548  insn->sibBase = SIB_BASE_NONE;
549  break;
550  case 0x1:
551  insn->eaDisplacement = EA_DISP_8;
552  insn->sibBase = (SIBBase)(sibBaseBase + base);
553  break;
554  case 0x2:
555  insn->eaDisplacement = EA_DISP_32;
556  insn->sibBase = (SIBBase)(sibBaseBase + base);
557  break;
558  default:
559  llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte");
560  }
561  break;
562  default:
563  insn->sibBase = (SIBBase)(sibBaseBase + base);
564  break;
565  }
566 
567  return 0;
568 }
569 
570 static int readDisplacement(struct InternalInstruction *insn) {
571  int8_t d8;
572  int16_t d16;
573  int32_t d32;
574  LLVM_DEBUG(dbgs() << "readDisplacement()");
575 
576  insn->displacementOffset = insn->readerCursor - insn->startLocation;
577  switch (insn->eaDisplacement) {
578  case EA_DISP_NONE:
579  break;
580  case EA_DISP_8:
581  if (consume(insn, d8))
582  return -1;
583  insn->displacement = d8;
584  break;
585  case EA_DISP_16:
586  if (consume(insn, d16))
587  return -1;
588  insn->displacement = d16;
589  break;
590  case EA_DISP_32:
591  if (consume(insn, d32))
592  return -1;
593  insn->displacement = d32;
594  break;
595  }
596 
597  return 0;
598 }
599 
600 // Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
601 static int readModRM(struct InternalInstruction *insn) {
602  uint8_t mod, rm, reg, evexrm;
603  LLVM_DEBUG(dbgs() << "readModRM()");
604 
605  if (insn->consumedModRM)
606  return 0;
607 
608  if (consume(insn, insn->modRM))
609  return -1;
610  insn->consumedModRM = true;
611 
612  mod = modFromModRM(insn->modRM);
613  rm = rmFromModRM(insn->modRM);
614  reg = regFromModRM(insn->modRM);
615 
616  // This goes by insn->registerSize to pick the correct register, which messes
617  // up if we're using (say) XMM or 8-bit register operands. That gets fixed in
618  // fixupReg().
619  switch (insn->registerSize) {
620  case 2:
621  insn->regBase = MODRM_REG_AX;
622  insn->eaRegBase = EA_REG_AX;
623  break;
624  case 4:
625  insn->regBase = MODRM_REG_EAX;
626  insn->eaRegBase = EA_REG_EAX;
627  break;
628  case 8:
629  insn->regBase = MODRM_REG_RAX;
630  insn->eaRegBase = EA_REG_RAX;
631  break;
632  }
633 
634  reg |= rFromREX(insn->rexPrefix) << 3;
635  rm |= bFromREX(insn->rexPrefix) << 3;
636 
637  evexrm = 0;
638  if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
639  reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
640  evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
641  }
642 
643  insn->reg = (Reg)(insn->regBase + reg);
644 
645  switch (insn->addressSize) {
646  case 2: {
647  EABase eaBaseBase = EA_BASE_BX_SI;
648 
649  switch (mod) {
650  case 0x0:
651  if (rm == 0x6) {
652  insn->eaBase = EA_BASE_NONE;
653  insn->eaDisplacement = EA_DISP_16;
654  if (readDisplacement(insn))
655  return -1;
656  } else {
657  insn->eaBase = (EABase)(eaBaseBase + rm);
659  }
660  break;
661  case 0x1:
662  insn->eaBase = (EABase)(eaBaseBase + rm);
663  insn->eaDisplacement = EA_DISP_8;
664  insn->displacementSize = 1;
665  if (readDisplacement(insn))
666  return -1;
667  break;
668  case 0x2:
669  insn->eaBase = (EABase)(eaBaseBase + rm);
670  insn->eaDisplacement = EA_DISP_16;
671  if (readDisplacement(insn))
672  return -1;
673  break;
674  case 0x3:
675  insn->eaBase = (EABase)(insn->eaRegBase + rm);
676  if (readDisplacement(insn))
677  return -1;
678  break;
679  }
680  break;
681  }
682  case 4:
683  case 8: {
684  EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
685 
686  switch (mod) {
687  case 0x0:
688  insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this
689  // In determining whether RIP-relative mode is used (rm=5),
690  // or whether a SIB byte is present (rm=4),
691  // the extension bits (REX.b and EVEX.x) are ignored.
692  switch (rm & 7) {
693  case 0x4: // SIB byte is present
694  insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64);
695  if (readSIB(insn) || readDisplacement(insn))
696  return -1;
697  break;
698  case 0x5: // RIP-relative
699  insn->eaBase = EA_BASE_NONE;
700  insn->eaDisplacement = EA_DISP_32;
701  if (readDisplacement(insn))
702  return -1;
703  break;
704  default:
705  insn->eaBase = (EABase)(eaBaseBase + rm);
706  break;
707  }
708  break;
709  case 0x1:
710  insn->displacementSize = 1;
712  case 0x2:
713  insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
714  switch (rm & 7) {
715  case 0x4: // SIB byte is present
716  insn->eaBase = EA_BASE_sib;
717  if (readSIB(insn) || readDisplacement(insn))
718  return -1;
719  break;
720  default:
721  insn->eaBase = (EABase)(eaBaseBase + rm);
722  if (readDisplacement(insn))
723  return -1;
724  break;
725  }
726  break;
727  case 0x3:
729  insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
730  break;
731  }
732  break;
733  }
734  } // switch (insn->addressSize)
735 
736  return 0;
737 }
738 
739 #define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
740  static uint16_t name(struct InternalInstruction *insn, OperandType type, \
741  uint8_t index, uint8_t *valid) { \
742  *valid = 1; \
743  switch (type) { \
744  default: \
745  debug("Unhandled register type"); \
746  *valid = 0; \
747  return 0; \
748  case TYPE_Rv: \
749  return base + index; \
750  case TYPE_R8: \
751  index &= mask; \
752  if (index > 0xf) \
753  *valid = 0; \
754  if (insn->rexPrefix && index >= 4 && index <= 7) { \
755  return prefix##_SPL + (index - 4); \
756  } else { \
757  return prefix##_AL + index; \
758  } \
759  case TYPE_R16: \
760  index &= mask; \
761  if (index > 0xf) \
762  *valid = 0; \
763  return prefix##_AX + index; \
764  case TYPE_R32: \
765  index &= mask; \
766  if (index > 0xf) \
767  *valid = 0; \
768  return prefix##_EAX + index; \
769  case TYPE_R64: \
770  index &= mask; \
771  if (index > 0xf) \
772  *valid = 0; \
773  return prefix##_RAX + index; \
774  case TYPE_ZMM: \
775  return prefix##_ZMM0 + index; \
776  case TYPE_YMM: \
777  return prefix##_YMM0 + index; \
778  case TYPE_XMM: \
779  return prefix##_XMM0 + index; \
780  case TYPE_TMM: \
781  if (index > 7) \
782  *valid = 0; \
783  return prefix##_TMM0 + index; \
784  case TYPE_VK: \
785  index &= 0xf; \
786  if (index > 7) \
787  *valid = 0; \
788  return prefix##_K0 + index; \
789  case TYPE_VK_PAIR: \
790  if (index > 7) \
791  *valid = 0; \
792  return prefix##_K0_K1 + (index / 2); \
793  case TYPE_MM64: \
794  return prefix##_MM0 + (index & 0x7); \
795  case TYPE_SEGMENTREG: \
796  if ((index & 7) > 5) \
797  *valid = 0; \
798  return prefix##_ES + (index & 7); \
799  case TYPE_DEBUGREG: \
800  return prefix##_DR0 + index; \
801  case TYPE_CONTROLREG: \
802  return prefix##_CR0 + index; \
803  case TYPE_BNDR: \
804  if (index > 3) \
805  *valid = 0; \
806  return prefix##_BND0 + index; \
807  case TYPE_MVSIBX: \
808  return prefix##_XMM0 + index; \
809  case TYPE_MVSIBY: \
810  return prefix##_YMM0 + index; \
811  case TYPE_MVSIBZ: \
812  return prefix##_ZMM0 + index; \
813  } \
814  }
815 
816 // Consult an operand type to determine the meaning of the reg or R/M field. If
817 // the operand is an XMM operand, for example, an operand would be XMM0 instead
818 // of AX, which readModRM() would otherwise misinterpret it as.
819 //
820 // @param insn - The instruction containing the operand.
821 // @param type - The operand type.
822 // @param index - The existing value of the field as reported by readModRM().
823 // @param valid - The address of a uint8_t. The target is set to 1 if the
824 // field is valid for the register class; 0 if not.
825 // @return - The proper value.
826 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)
827 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)
828 
829 // Consult an operand specifier to determine which of the fixup*Value functions
830 // to use in correcting readModRM()'ss interpretation.
831 //
832 // @param insn - See fixup*Value().
833 // @param op - The operand specifier.
834 // @return - 0 if fixup was successful; -1 if the register returned was
835 // invalid for its class.
836 static int fixupReg(struct InternalInstruction *insn,
837  const struct OperandSpecifier *op) {
838  uint8_t valid;
839  LLVM_DEBUG(dbgs() << "fixupReg()");
840 
841  switch ((OperandEncoding)op->encoding) {
842  default:
843  debug("Expected a REG or R/M encoding in fixupReg");
844  return -1;
845  case ENCODING_VVVV:
846  insn->vvvv =
847  (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid);
848  if (!valid)
849  return -1;
850  break;
851  case ENCODING_REG:
852  insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type,
853  insn->reg - insn->regBase, &valid);
854  if (!valid)
855  return -1;
856  break;
857  case ENCODING_SIB:
859  if (insn->eaBase >= insn->eaRegBase) {
860  insn->eaBase = (EABase)fixupRMValue(
861  insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);
862  if (!valid)
863  return -1;
864  }
865  break;
866  }
867 
868  return 0;
869 }
870 
871 // Read the opcode (except the ModR/M byte in the case of extended or escape
872 // opcodes).
873 static bool readOpcode(struct InternalInstruction *insn) {
874  uint8_t current;
875  LLVM_DEBUG(dbgs() << "readOpcode()");
876 
877  insn->opcodeType = ONEBYTE;
878  if (insn->vectorExtensionType == TYPE_EVEX) {
879  switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
880  default:
881  LLVM_DEBUG(
882  dbgs() << format("Unhandled mm field for instruction (0x%hhx)",
884  return true;
885  case VEX_LOB_0F:
886  insn->opcodeType = TWOBYTE;
887  return consume(insn, insn->opcode);
888  case VEX_LOB_0F38:
889  insn->opcodeType = THREEBYTE_38;
890  return consume(insn, insn->opcode);
891  case VEX_LOB_0F3A:
892  insn->opcodeType = THREEBYTE_3A;
893  return consume(insn, insn->opcode);
894  }
895  } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
896  switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
897  default:
898  LLVM_DEBUG(
899  dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
901  return true;
902  case VEX_LOB_0F:
903  insn->opcodeType = TWOBYTE;
904  return consume(insn, insn->opcode);
905  case VEX_LOB_0F38:
906  insn->opcodeType = THREEBYTE_38;
907  return consume(insn, insn->opcode);
908  case VEX_LOB_0F3A:
909  insn->opcodeType = THREEBYTE_3A;
910  return consume(insn, insn->opcode);
911  }
912  } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
913  insn->opcodeType = TWOBYTE;
914  return consume(insn, insn->opcode);
915  } else if (insn->vectorExtensionType == TYPE_XOP) {
916  switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
917  default:
918  LLVM_DEBUG(
919  dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
921  return true;
922  case XOP_MAP_SELECT_8:
923  insn->opcodeType = XOP8_MAP;
924  return consume(insn, insn->opcode);
925  case XOP_MAP_SELECT_9:
926  insn->opcodeType = XOP9_MAP;
927  return consume(insn, insn->opcode);
928  case XOP_MAP_SELECT_A:
929  insn->opcodeType = XOPA_MAP;
930  return consume(insn, insn->opcode);
931  }
932  }
933 
934  if (consume(insn, current))
935  return true;
936 
937  if (current == 0x0f) {
938  LLVM_DEBUG(
939  dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));
940  if (consume(insn, current))
941  return true;
942 
943  if (current == 0x38) {
944  LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
945  current));
946  if (consume(insn, current))
947  return true;
948 
949  insn->opcodeType = THREEBYTE_38;
950  } else if (current == 0x3a) {
951  LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
952  current));
953  if (consume(insn, current))
954  return true;
955 
956  insn->opcodeType = THREEBYTE_3A;
957  } else if (current == 0x0f) {
958  LLVM_DEBUG(
959  dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));
960 
961  // Consume operands before the opcode to comply with the 3DNow encoding
962  if (readModRM(insn))
963  return true;
964 
965  if (consume(insn, current))
966  return true;
967 
968  insn->opcodeType = THREEDNOW_MAP;
969  } else {
970  LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");
971  insn->opcodeType = TWOBYTE;
972  }
973  } else if (insn->mandatoryPrefix)
974  // The opcode with mandatory prefix must start with opcode escape.
975  // If not it's legacy repeat prefix
976  insn->mandatoryPrefix = 0;
977 
978  // At this point we have consumed the full opcode.
979  // Anything we consume from here on must be unconsumed.
980  insn->opcode = current;
981 
982  return false;
983 }
984 
985 // Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).
986 static bool is16BitEquivalent(const char *orig, const char *equiv) {
987  for (int i = 0;; i++) {
988  if (orig[i] == '\0' && equiv[i] == '\0')
989  return true;
990  if (orig[i] == '\0' || equiv[i] == '\0')
991  return false;
992  if (orig[i] != equiv[i]) {
993  if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
994  continue;
995  if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
996  continue;
997  if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
998  continue;
999  return false;
1000  }
1001  }
1002 }
1003 
1004 // Determine whether this instruction is a 64-bit instruction.
1005 static bool is64Bit(const char *name) {
1006  for (int i = 0;; ++i) {
1007  if (name[i] == '\0')
1008  return false;
1009  if (name[i] == '6' && name[i + 1] == '4')
1010  return true;
1011  }
1012 }
1013 
1014 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1015 // for extended and escape opcodes, and using a supplied attribute mask.
1016 static int getInstructionIDWithAttrMask(uint16_t *instructionID,
1017  struct InternalInstruction *insn,
1018  uint16_t attrMask) {
1019  auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);
1020  const ContextDecision *decision;
1021  switch (insn->opcodeType) {
1022  case ONEBYTE:
1023  decision = &ONEBYTE_SYM;
1024  break;
1025  case TWOBYTE:
1026  decision = &TWOBYTE_SYM;
1027  break;
1028  case THREEBYTE_38:
1029  decision = &THREEBYTE38_SYM;
1030  break;
1031  case THREEBYTE_3A:
1032  decision = &THREEBYTE3A_SYM;
1033  break;
1034  case XOP8_MAP:
1035  decision = &XOP8_MAP_SYM;
1036  break;
1037  case XOP9_MAP:
1038  decision = &XOP9_MAP_SYM;
1039  break;
1040  case XOPA_MAP:
1041  decision = &XOPA_MAP_SYM;
1042  break;
1043  case THREEDNOW_MAP:
1044  decision = &THREEDNOW_MAP_SYM;
1045  break;
1046  }
1047 
1048  if (decision->opcodeDecisions[insnCtx]
1049  .modRMDecisions[insn->opcode]
1050  .modrm_type != MODRM_ONEENTRY) {
1051  if (readModRM(insn))
1052  return -1;
1053  *instructionID =
1054  decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM);
1055  } else {
1056  *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0);
1057  }
1058 
1059  return 0;
1060 }
1061 
1062 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1063 // for extended and escape opcodes. Determines the attributes and context for
1064 // the instruction before doing so.
1065 static int getInstructionID(struct InternalInstruction *insn,
1066  const MCInstrInfo *mii) {
1067  uint16_t attrMask;
1068  uint16_t instructionID;
1069 
1070  LLVM_DEBUG(dbgs() << "getID()");
1071 
1072  attrMask = ATTR_NONE;
1073 
1074  if (insn->mode == MODE_64BIT)
1075  attrMask |= ATTR_64BIT;
1076 
1077  if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1078  attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
1079 
1080  if (insn->vectorExtensionType == TYPE_EVEX) {
1081  switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
1082  case VEX_PREFIX_66:
1083  attrMask |= ATTR_OPSIZE;
1084  break;
1085  case VEX_PREFIX_F3:
1086  attrMask |= ATTR_XS;
1087  break;
1088  case VEX_PREFIX_F2:
1089  attrMask |= ATTR_XD;
1090  break;
1091  }
1092 
1093  if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1094  attrMask |= ATTR_EVEXKZ;
1095  if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1096  attrMask |= ATTR_EVEXB;
1097  if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1098  attrMask |= ATTR_EVEXK;
1099  if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1100  attrMask |= ATTR_VEXL;
1101  if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1102  attrMask |= ATTR_EVEXL2;
1103  } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
1104  switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
1105  case VEX_PREFIX_66:
1106  attrMask |= ATTR_OPSIZE;
1107  break;
1108  case VEX_PREFIX_F3:
1109  attrMask |= ATTR_XS;
1110  break;
1111  case VEX_PREFIX_F2:
1112  attrMask |= ATTR_XD;
1113  break;
1114  }
1115 
1116  if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
1117  attrMask |= ATTR_VEXL;
1118  } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
1119  switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
1120  case VEX_PREFIX_66:
1121  attrMask |= ATTR_OPSIZE;
1122  break;
1123  case VEX_PREFIX_F3:
1124  attrMask |= ATTR_XS;
1125  break;
1126  case VEX_PREFIX_F2:
1127  attrMask |= ATTR_XD;
1128  break;
1129  }
1130 
1131  if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
1132  attrMask |= ATTR_VEXL;
1133  } else if (insn->vectorExtensionType == TYPE_XOP) {
1134  switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
1135  case VEX_PREFIX_66:
1136  attrMask |= ATTR_OPSIZE;
1137  break;
1138  case VEX_PREFIX_F3:
1139  attrMask |= ATTR_XS;
1140  break;
1141  case VEX_PREFIX_F2:
1142  attrMask |= ATTR_XD;
1143  break;
1144  }
1145 
1146  if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
1147  attrMask |= ATTR_VEXL;
1148  } else {
1149  return -1;
1150  }
1151  } else if (!insn->mandatoryPrefix) {
1152  // If we don't have mandatory prefix we should use legacy prefixes here
1153  if (insn->hasOpSize && (insn->mode != MODE_16BIT))
1154  attrMask |= ATTR_OPSIZE;
1155  if (insn->hasAdSize)
1156  attrMask |= ATTR_ADSIZE;
1157  if (insn->opcodeType == ONEBYTE) {
1158  if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
1159  // Special support for PAUSE
1160  attrMask |= ATTR_XS;
1161  } else {
1162  if (insn->repeatPrefix == 0xf2)
1163  attrMask |= ATTR_XD;
1164  else if (insn->repeatPrefix == 0xf3)
1165  attrMask |= ATTR_XS;
1166  }
1167  } else {
1168  switch (insn->mandatoryPrefix) {
1169  case 0xf2:
1170  attrMask |= ATTR_XD;
1171  break;
1172  case 0xf3:
1173  attrMask |= ATTR_XS;
1174  break;
1175  case 0x66:
1176  if (insn->mode != MODE_16BIT)
1177  attrMask |= ATTR_OPSIZE;
1178  break;
1179  case 0x67:
1180  attrMask |= ATTR_ADSIZE;
1181  break;
1182  }
1183  }
1184 
1185  if (insn->rexPrefix & 0x08) {
1186  attrMask |= ATTR_REXW;
1187  attrMask &= ~ATTR_ADSIZE;
1188  }
1189 
1190  if (insn->mode == MODE_16BIT) {
1191  // JCXZ/JECXZ need special handling for 16-bit mode because the meaning
1192  // of the AdSize prefix is inverted w.r.t. 32-bit mode.
1193  if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3)
1194  attrMask ^= ATTR_ADSIZE;
1195  // If we're in 16-bit mode and this is one of the relative jumps and opsize
1196  // prefix isn't present, we need to force the opsize attribute since the
1197  // prefix is inverted relative to 32-bit mode.
1198  if (!insn->hasOpSize && insn->opcodeType == ONEBYTE &&
1199  (insn->opcode == 0xE8 || insn->opcode == 0xE9))
1200  attrMask |= ATTR_OPSIZE;
1201 
1202  if (!insn->hasOpSize && insn->opcodeType == TWOBYTE &&
1203  insn->opcode >= 0x80 && insn->opcode <= 0x8F)
1204  attrMask |= ATTR_OPSIZE;
1205  }
1206 
1207 
1208  if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
1209  return -1;
1210 
1211  // The following clauses compensate for limitations of the tables.
1212 
1213  if (insn->mode != MODE_64BIT &&
1215  // The tables can't distinquish between cases where the W-bit is used to
1216  // select register size and cases where its a required part of the opcode.
1217  if ((insn->vectorExtensionType == TYPE_EVEX &&
1218  wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
1219  (insn->vectorExtensionType == TYPE_VEX_3B &&
1220  wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
1221  (insn->vectorExtensionType == TYPE_XOP &&
1222  wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
1223 
1224  uint16_t instructionIDWithREXW;
1225  if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn,
1226  attrMask | ATTR_REXW)) {
1227  insn->instructionID = instructionID;
1228  insn->spec = &INSTRUCTIONS_SYM[instructionID];
1229  return 0;
1230  }
1231 
1232  auto SpecName = mii->getName(instructionIDWithREXW);
1233  // If not a 64-bit instruction. Switch the opcode.
1234  if (!is64Bit(SpecName.data())) {
1235  insn->instructionID = instructionIDWithREXW;
1236  insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW];
1237  return 0;
1238  }
1239  }
1240  }
1241 
1242  // Absolute moves, umonitor, and movdir64b need special handling.
1243  // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1244  // inverted w.r.t.
1245  // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1246  // any position.
1247  if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
1248  (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
1249  (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {
1250  // Make sure we observed the prefixes in any position.
1251  if (insn->hasAdSize)
1252  attrMask |= ATTR_ADSIZE;
1253  if (insn->hasOpSize)
1254  attrMask |= ATTR_OPSIZE;
1255 
1256  // In 16-bit, invert the attributes.
1257  if (insn->mode == MODE_16BIT) {
1258  attrMask ^= ATTR_ADSIZE;
1259 
1260  // The OpSize attribute is only valid with the absolute moves.
1261  if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
1262  attrMask ^= ATTR_OPSIZE;
1263  }
1264 
1265  if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
1266  return -1;
1267 
1268  insn->instructionID = instructionID;
1269  insn->spec = &INSTRUCTIONS_SYM[instructionID];
1270  return 0;
1271  }
1272 
1273  if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
1274  !(attrMask & ATTR_OPSIZE)) {
1275  // The instruction tables make no distinction between instructions that
1276  // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1277  // particular spot (i.e., many MMX operations). In general we're
1278  // conservative, but in the specific case where OpSize is present but not in
1279  // the right place we check if there's a 16-bit operation.
1280  const struct InstructionSpecifier *spec;
1281  uint16_t instructionIDWithOpsize;
1282  llvm::StringRef specName, specWithOpSizeName;
1283 
1284  spec = &INSTRUCTIONS_SYM[instructionID];
1285 
1286  if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn,
1287  attrMask | ATTR_OPSIZE)) {
1288  // ModRM required with OpSize but not present. Give up and return the
1289  // version without OpSize set.
1290  insn->instructionID = instructionID;
1291  insn->spec = spec;
1292  return 0;
1293  }
1294 
1295  specName = mii->getName(instructionID);
1296  specWithOpSizeName = mii->getName(instructionIDWithOpsize);
1297 
1298  if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
1299  (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
1300  insn->instructionID = instructionIDWithOpsize;
1301  insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize];
1302  } else {
1303  insn->instructionID = instructionID;
1304  insn->spec = spec;
1305  }
1306  return 0;
1307  }
1308 
1309  if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1310  insn->rexPrefix & 0x01) {
1311  // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode
1312  // as XCHG %r8, %eax.
1313  const struct InstructionSpecifier *spec;
1314  uint16_t instructionIDWithNewOpcode;
1315  const struct InstructionSpecifier *specWithNewOpcode;
1316 
1317  spec = &INSTRUCTIONS_SYM[instructionID];
1318 
1319  // Borrow opcode from one of the other XCHGar opcodes
1320  insn->opcode = 0x91;
1321 
1322  if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn,
1323  attrMask)) {
1324  insn->opcode = 0x90;
1325 
1326  insn->instructionID = instructionID;
1327  insn->spec = spec;
1328  return 0;
1329  }
1330 
1331  specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];
1332 
1333  // Change back
1334  insn->opcode = 0x90;
1335 
1336  insn->instructionID = instructionIDWithNewOpcode;
1337  insn->spec = specWithNewOpcode;
1338 
1339  return 0;
1340  }
1341 
1342  insn->instructionID = instructionID;
1343  insn->spec = &INSTRUCTIONS_SYM[insn->instructionID];
1344 
1345  return 0;
1346 }
1347 
1348 // Read an operand from the opcode field of an instruction and interprets it
1349 // appropriately given the operand width. Handles AddRegFrm instructions.
1350 //
1351 // @param insn - the instruction whose opcode field is to be read.
1352 // @param size - The width (in bytes) of the register being specified.
1353 // 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1354 // RAX.
1355 // @return - 0 on success; nonzero otherwise.
1356 static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
1357  LLVM_DEBUG(dbgs() << "readOpcodeRegister()");
1358 
1359  if (size == 0)
1360  size = insn->registerSize;
1361 
1362  switch (size) {
1363  case 1:
1364  insn->opcodeRegister = (Reg)(
1365  MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1366  if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1367  insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1368  insn->opcodeRegister =
1369  (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4));
1370  }
1371 
1372  break;
1373  case 2:
1374  insn->opcodeRegister = (Reg)(
1375  MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1376  break;
1377  case 4:
1378  insn->opcodeRegister =
1379  (Reg)(MODRM_REG_EAX +
1380  ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1381  break;
1382  case 8:
1383  insn->opcodeRegister =
1384  (Reg)(MODRM_REG_RAX +
1385  ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1386  break;
1387  }
1388 
1389  return 0;
1390 }
1391 
1392 // Consume an immediate operand from an instruction, given the desired operand
1393 // size.
1394 //
1395 // @param insn - The instruction whose operand is to be read.
1396 // @param size - The width (in bytes) of the operand.
1397 // @return - 0 if the immediate was successfully consumed; nonzero
1398 // otherwise.
1399 static int readImmediate(struct InternalInstruction *insn, uint8_t size) {
1400  uint8_t imm8;
1401  uint16_t imm16;
1402  uint32_t imm32;
1403  uint64_t imm64;
1404 
1405  LLVM_DEBUG(dbgs() << "readImmediate()");
1406 
1407  assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates");
1408 
1409  insn->immediateSize = size;
1410  insn->immediateOffset = insn->readerCursor - insn->startLocation;
1411 
1412  switch (size) {
1413  case 1:
1414  if (consume(insn, imm8))
1415  return -1;
1416  insn->immediates[insn->numImmediatesConsumed] = imm8;
1417  break;
1418  case 2:
1419  if (consume(insn, imm16))
1420  return -1;
1421  insn->immediates[insn->numImmediatesConsumed] = imm16;
1422  break;
1423  case 4:
1424  if (consume(insn, imm32))
1425  return -1;
1426  insn->immediates[insn->numImmediatesConsumed] = imm32;
1427  break;
1428  case 8:
1429  if (consume(insn, imm64))
1430  return -1;
1431  insn->immediates[insn->numImmediatesConsumed] = imm64;
1432  break;
1433  default:
1434  llvm_unreachable("invalid size");
1435  }
1436 
1437  insn->numImmediatesConsumed++;
1438 
1439  return 0;
1440 }
1441 
1442 // Consume vvvv from an instruction if it has a VEX prefix.
1443 static int readVVVV(struct InternalInstruction *insn) {
1444  LLVM_DEBUG(dbgs() << "readVVVV()");
1445 
1446  int vvvv;
1447  if (insn->vectorExtensionType == TYPE_EVEX)
1448  vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
1450  else if (insn->vectorExtensionType == TYPE_VEX_3B)
1451  vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1452  else if (insn->vectorExtensionType == TYPE_VEX_2B)
1453  vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1454  else if (insn->vectorExtensionType == TYPE_XOP)
1455  vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1456  else
1457  return -1;
1458 
1459  if (insn->mode != MODE_64BIT)
1460  vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
1461 
1462  insn->vvvv = static_cast<Reg>(vvvv);
1463  return 0;
1464 }
1465 
1466 // Read an mask register from the opcode field of an instruction.
1467 //
1468 // @param insn - The instruction whose opcode field is to be read.
1469 // @return - 0 on success; nonzero otherwise.
1470 static int readMaskRegister(struct InternalInstruction *insn) {
1471  LLVM_DEBUG(dbgs() << "readMaskRegister()");
1472 
1473  if (insn->vectorExtensionType != TYPE_EVEX)
1474  return -1;
1475 
1476  insn->writemask =
1477  static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
1478  return 0;
1479 }
1480 
1481 // Consults the specifier for an instruction and consumes all
1482 // operands for that instruction, interpreting them as it goes.
1483 static int readOperands(struct InternalInstruction *insn) {
1484  int hasVVVV, needVVVV;
1485  int sawRegImm = 0;
1486 
1487  LLVM_DEBUG(dbgs() << "readOperands()");
1488 
1489  // If non-zero vvvv specified, make sure one of the operands uses it.
1490  hasVVVV = !readVVVV(insn);
1491  needVVVV = hasVVVV && (insn->vvvv != 0);
1492 
1493  for (const auto &Op : x86OperandSets[insn->spec->operands]) {
1494  switch (Op.encoding) {
1495  case ENCODING_NONE:
1496  case ENCODING_SI:
1497  case ENCODING_DI:
1498  break;
1500  // VSIB can use the V2 bit so check only the other bits.
1501  if (needVVVV)
1502  needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
1503  if (readModRM(insn))
1504  return -1;
1505 
1506  // Reject if SIB wasn't used.
1507  if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1508  return -1;
1509 
1510  // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
1511  if (insn->sibIndex == SIB_INDEX_NONE)
1512  insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
1513 
1514  // If EVEX.v2 is set this is one of the 16-31 registers.
1515  if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
1517  insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
1518 
1519  // Adjust the index register to the correct size.
1520  switch ((OperandType)Op.type) {
1521  default:
1522  debug("Unhandled VSIB index type");
1523  return -1;
1524  case TYPE_MVSIBX:
1525  insn->sibIndex =
1526  (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase));
1527  break;
1528  case TYPE_MVSIBY:
1529  insn->sibIndex =
1530  (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase));
1531  break;
1532  case TYPE_MVSIBZ:
1533  insn->sibIndex =
1534  (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase));
1535  break;
1536  }
1537 
1538  // Apply the AVX512 compressed displacement scaling factor.
1539  if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1540  insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
1541  break;
1542  case ENCODING_SIB:
1543  // Reject if SIB wasn't used.
1544  if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1545  return -1;
1546  if (readModRM(insn))
1547  return -1;
1548  if (fixupReg(insn, &Op))
1549  return -1;
1550  break;
1551  case ENCODING_REG:
1553  if (readModRM(insn))
1554  return -1;
1555  if (fixupReg(insn, &Op))
1556  return -1;
1557  // Apply the AVX512 compressed displacement scaling factor.
1558  if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1559  insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
1560  break;
1561  case ENCODING_IB:
1562  if (sawRegImm) {
1563  // Saw a register immediate so don't read again and instead split the
1564  // previous immediate. FIXME: This is a hack.
1565  insn->immediates[insn->numImmediatesConsumed] =
1566  insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1567  ++insn->numImmediatesConsumed;
1568  break;
1569  }
1570  if (readImmediate(insn, 1))
1571  return -1;
1572  if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
1573  sawRegImm = 1;
1574  break;
1575  case ENCODING_IW:
1576  if (readImmediate(insn, 2))
1577  return -1;
1578  break;
1579  case ENCODING_ID:
1580  if (readImmediate(insn, 4))
1581  return -1;
1582  break;
1583  case ENCODING_IO:
1584  if (readImmediate(insn, 8))
1585  return -1;
1586  break;
1587  case ENCODING_Iv:
1588  if (readImmediate(insn, insn->immediateSize))
1589  return -1;
1590  break;
1591  case ENCODING_Ia:
1592  if (readImmediate(insn, insn->addressSize))
1593  return -1;
1594  break;
1595  case ENCODING_IRC:
1596  insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
1598  break;
1599  case ENCODING_RB:
1600  if (readOpcodeRegister(insn, 1))
1601  return -1;
1602  break;
1603  case ENCODING_RW:
1604  if (readOpcodeRegister(insn, 2))
1605  return -1;
1606  break;
1607  case ENCODING_RD:
1608  if (readOpcodeRegister(insn, 4))
1609  return -1;
1610  break;
1611  case ENCODING_RO:
1612  if (readOpcodeRegister(insn, 8))
1613  return -1;
1614  break;
1615  case ENCODING_Rv:
1616  if (readOpcodeRegister(insn, 0))
1617  return -1;
1618  break;
1619  case ENCODING_CC:
1620  insn->immediates[1] = insn->opcode & 0xf;
1621  break;
1622  case ENCODING_FP:
1623  break;
1624  case ENCODING_VVVV:
1625  needVVVV = 0; // Mark that we have found a VVVV operand.
1626  if (!hasVVVV)
1627  return -1;
1628  if (insn->mode != MODE_64BIT)
1629  insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
1630  if (fixupReg(insn, &Op))
1631  return -1;
1632  break;
1633  case ENCODING_WRITEMASK:
1634  if (readMaskRegister(insn))
1635  return -1;
1636  break;
1637  case ENCODING_DUP:
1638  break;
1639  default:
1640  LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");
1641  return -1;
1642  }
1643  }
1644 
1645  // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail
1646  if (needVVVV)
1647  return -1;
1648 
1649  return 0;
1650 }
1651 
1652 namespace llvm {
1653 
1654 // Fill-ins to make the compiler happy. These constants are never actually
1655 // assigned; they are just filler to make an automatically-generated switch
1656 // statement work.
1657 namespace X86 {
1658  enum {
1659  BX_SI = 500,
1660  BX_DI = 501,
1661  BP_SI = 502,
1662  BP_DI = 503,
1663  sib = 504,
1664  sib64 = 505
1665  };
1666 } // namespace X86
1667 
1668 } // namespace llvm
1669 
1670 static bool translateInstruction(MCInst &target,
1671  InternalInstruction &source,
1672  const MCDisassembler *Dis);
1673 
1674 namespace {
1675 
1676 /// Generic disassembler for all X86 platforms. All each platform class should
1677 /// have to do is subclass the constructor, and provide a different
1678 /// disassemblerMode value.
1679 class X86GenericDisassembler : public MCDisassembler {
1680  std::unique_ptr<const MCInstrInfo> MII;
1681 public:
1682  X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
1683  std::unique_ptr<const MCInstrInfo> MII);
1684 public:
1685  DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
1686  ArrayRef<uint8_t> Bytes, uint64_t Address,
1687  raw_ostream &cStream) const override;
1688 
1689 private:
1690  DisassemblerMode fMode;
1691 };
1692 
1693 } // namespace
1694 
1695 X86GenericDisassembler::X86GenericDisassembler(
1696  const MCSubtargetInfo &STI,
1697  MCContext &Ctx,
1698  std::unique_ptr<const MCInstrInfo> MII)
1699  : MCDisassembler(STI, Ctx), MII(std::move(MII)) {
1700  const FeatureBitset &FB = STI.getFeatureBits();
1701  if (FB[X86::Mode16Bit]) {
1702  fMode = MODE_16BIT;
1703  return;
1704  } else if (FB[X86::Mode32Bit]) {
1705  fMode = MODE_32BIT;
1706  return;
1707  } else if (FB[X86::Mode64Bit]) {
1708  fMode = MODE_64BIT;
1709  return;
1710  }
1711 
1712  llvm_unreachable("Invalid CPU mode");
1713 }
1714 
1715 MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
1716  MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
1717  raw_ostream &CStream) const {
1718  CommentStream = &CStream;
1719 
1720  InternalInstruction Insn;
1721  memset(&Insn, 0, sizeof(InternalInstruction));
1722  Insn.bytes = Bytes;
1723  Insn.startLocation = Address;
1724  Insn.readerCursor = Address;
1725  Insn.mode = fMode;
1726 
1727  if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) ||
1728  getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 ||
1729  readOperands(&Insn)) {
1730  Size = Insn.readerCursor - Address;
1731  return Fail;
1732  }
1733 
1734  Insn.operands = x86OperandSets[Insn.spec->operands];
1735  Insn.length = Insn.readerCursor - Insn.startLocation;
1736  Size = Insn.length;
1737  if (Size > 15)
1738  LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");
1739 
1740  bool Ret = translateInstruction(Instr, Insn, this);
1741  if (!Ret) {
1742  unsigned Flags = X86::IP_NO_PREFIX;
1743  if (Insn.hasAdSize)
1744  Flags |= X86::IP_HAS_AD_SIZE;
1745  if (!Insn.mandatoryPrefix) {
1746  if (Insn.hasOpSize)
1747  Flags |= X86::IP_HAS_OP_SIZE;
1748  if (Insn.repeatPrefix == 0xf2)
1749  Flags |= X86::IP_HAS_REPEAT_NE;
1750  else if (Insn.repeatPrefix == 0xf3 &&
1751  // It should not be 'pause' f3 90
1752  Insn.opcode != 0x90)
1753  Flags |= X86::IP_HAS_REPEAT;
1754  if (Insn.hasLockPrefix)
1755  Flags |= X86::IP_HAS_LOCK;
1756  }
1757  Instr.setFlags(Flags);
1758  }
1759  return (!Ret) ? Success : Fail;
1760 }
1761 
1762 //
1763 // Private code that translates from struct InternalInstructions to MCInsts.
1764 //
1765 
1766 /// translateRegister - Translates an internal register to the appropriate LLVM
1767 /// register, and appends it as an operand to an MCInst.
1768 ///
1769 /// @param mcInst - The MCInst to append to.
1770 /// @param reg - The Reg to append.
1771 static void translateRegister(MCInst &mcInst, Reg reg) {
1772 #define ENTRY(x) X86::x,
1773  static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};
1774 #undef ENTRY
1775 
1776  MCPhysReg llvmRegnum = llvmRegnums[reg];
1777  mcInst.addOperand(MCOperand::createReg(llvmRegnum));
1778 }
1779 
1780 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
1781 /// immediate Value in the MCInst.
1782 ///
1783 /// @param Value - The immediate Value, has had any PC adjustment made by
1784 /// the caller.
1785 /// @param isBranch - If the instruction is a branch instruction
1786 /// @param Address - The starting address of the instruction
1787 /// @param Offset - The byte offset to this immediate in the instruction
1788 /// @param Width - The byte width of this immediate in the instruction
1789 ///
1790 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
1791 /// called then that function is called to get any symbolic information for the
1792 /// immediate in the instruction using the Address, Offset and Width. If that
1793 /// returns non-zero then the symbolic information it returns is used to create
1794 /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
1795 /// returns zero and isBranch is true then a symbol look up for immediate Value
1796 /// is done and if a symbol is found an MCExpr is created with that, else
1797 /// an MCExpr with the immediate Value is created. This function returns true
1798 /// if it adds an operand to the MCInst and false otherwise.
1799 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
1800  uint64_t Address, uint64_t Offset,
1801  uint64_t Width, MCInst &MI,
1802  const MCDisassembler *Dis) {
1804  Offset, Width);
1805 }
1806 
1807 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
1808 /// referenced by a load instruction with the base register that is the rip.
1809 /// These can often be addresses in a literal pool. The Address of the
1810 /// instruction and its immediate Value are used to determine the address
1811 /// being referenced in the literal pool entry. The SymbolLookUp call back will
1812 /// return a pointer to a literal 'C' string if the referenced address is an
1813 /// address into a section with 'C' string literals.
1814 static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,
1815  const void *Decoder) {
1816  const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
1818 }
1819 
1820 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
1821  0, // SEG_OVERRIDE_NONE
1822  X86::CS,
1823  X86::SS,
1824  X86::DS,
1825  X86::ES,
1826  X86::FS,
1827  X86::GS
1828 };
1829 
1830 /// translateSrcIndex - Appends a source index operand to an MCInst.
1831 ///
1832 /// @param mcInst - The MCInst to append to.
1833 /// @param insn - The internal instruction.
1834 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
1835  unsigned baseRegNo;
1836 
1837  if (insn.mode == MODE_64BIT)
1838  baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;
1839  else if (insn.mode == MODE_32BIT)
1840  baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;
1841  else {
1842  assert(insn.mode == MODE_16BIT);
1843  baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;
1844  }
1845  MCOperand baseReg = MCOperand::createReg(baseRegNo);
1846  mcInst.addOperand(baseReg);
1847 
1848  MCOperand segmentReg;
1850  mcInst.addOperand(segmentReg);
1851  return false;
1852 }
1853 
1854 /// translateDstIndex - Appends a destination index operand to an MCInst.
1855 ///
1856 /// @param mcInst - The MCInst to append to.
1857 /// @param insn - The internal instruction.
1858 
1859 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
1860  unsigned baseRegNo;
1861 
1862  if (insn.mode == MODE_64BIT)
1863  baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;
1864  else if (insn.mode == MODE_32BIT)
1865  baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;
1866  else {
1867  assert(insn.mode == MODE_16BIT);
1868  baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;
1869  }
1870  MCOperand baseReg = MCOperand::createReg(baseRegNo);
1871  mcInst.addOperand(baseReg);
1872  return false;
1873 }
1874 
1875 /// translateImmediate - Appends an immediate operand to an MCInst.
1876 ///
1877 /// @param mcInst - The MCInst to append to.
1878 /// @param immediate - The immediate value to append.
1879 /// @param operand - The operand, as stored in the descriptor table.
1880 /// @param insn - The internal instruction.
1881 static void translateImmediate(MCInst &mcInst, uint64_t immediate,
1882  const OperandSpecifier &operand,
1883  InternalInstruction &insn,
1884  const MCDisassembler *Dis) {
1885  // Sign-extend the immediate if necessary.
1886 
1887  OperandType type = (OperandType)operand.type;
1888 
1889  bool isBranch = false;
1890  uint64_t pcrel = 0;
1891  if (type == TYPE_REL) {
1892  isBranch = true;
1893  pcrel = insn.startLocation +
1894  insn.immediateOffset + insn.immediateSize;
1895  switch (operand.encoding) {
1896  default:
1897  break;
1898  case ENCODING_Iv:
1899  switch (insn.displacementSize) {
1900  default:
1901  break;
1902  case 1:
1903  if(immediate & 0x80)
1904  immediate |= ~(0xffull);
1905  break;
1906  case 2:
1907  if(immediate & 0x8000)
1908  immediate |= ~(0xffffull);
1909  break;
1910  case 4:
1911  if(immediate & 0x80000000)
1912  immediate |= ~(0xffffffffull);
1913  break;
1914  case 8:
1915  break;
1916  }
1917  break;
1918  case ENCODING_IB:
1919  if(immediate & 0x80)
1920  immediate |= ~(0xffull);
1921  break;
1922  case ENCODING_IW:
1923  if(immediate & 0x8000)
1924  immediate |= ~(0xffffull);
1925  break;
1926  case ENCODING_ID:
1927  if(immediate & 0x80000000)
1928  immediate |= ~(0xffffffffull);
1929  break;
1930  }
1931  }
1932  // By default sign-extend all X86 immediates based on their encoding.
1933  else if (type == TYPE_IMM) {
1934  switch (operand.encoding) {
1935  default:
1936  break;
1937  case ENCODING_IB:
1938  if(immediate & 0x80)
1939  immediate |= ~(0xffull);
1940  break;
1941  case ENCODING_IW:
1942  if(immediate & 0x8000)
1943  immediate |= ~(0xffffull);
1944  break;
1945  case ENCODING_ID:
1946  if(immediate & 0x80000000)
1947  immediate |= ~(0xffffffffull);
1948  break;
1949  case ENCODING_IO:
1950  break;
1951  }
1952  }
1953 
1954  switch (type) {
1955  case TYPE_XMM:
1956  mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4)));
1957  return;
1958  case TYPE_YMM:
1959  mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4)));
1960  return;
1961  case TYPE_ZMM:
1962  mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));
1963  return;
1964  default:
1965  // operand is 64 bits wide. Do nothing.
1966  break;
1967  }
1968 
1969  if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
1970  insn.immediateOffset, insn.immediateSize,
1971  mcInst, Dis))
1972  mcInst.addOperand(MCOperand::createImm(immediate));
1973 
1974  if (type == TYPE_MOFFS) {
1975  MCOperand segmentReg;
1977  mcInst.addOperand(segmentReg);
1978  }
1979 }
1980 
1981 /// translateRMRegister - Translates a register stored in the R/M field of the
1982 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
1983 /// @param mcInst - The MCInst to append to.
1984 /// @param insn - The internal instruction to extract the R/M field
1985 /// from.
1986 /// @return - 0 on success; -1 otherwise
1987 static bool translateRMRegister(MCInst &mcInst,
1988  InternalInstruction &insn) {
1989  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
1990  debug("A R/M register operand may not have a SIB byte");
1991  return true;
1992  }
1993 
1994  switch (insn.eaBase) {
1995  default:
1996  debug("Unexpected EA base register");
1997  return true;
1998  case EA_BASE_NONE:
1999  debug("EA_BASE_NONE for ModR/M base");
2000  return true;
2001 #define ENTRY(x) case EA_BASE_##x:
2002  ALL_EA_BASES
2003 #undef ENTRY
2004  debug("A R/M register operand may not have a base; "
2005  "the operand must be a register.");
2006  return true;
2007 #define ENTRY(x) \
2008  case EA_REG_##x: \
2009  mcInst.addOperand(MCOperand::createReg(X86::x)); break;
2010  ALL_REGS
2011 #undef ENTRY
2012  }
2013 
2014  return false;
2015 }
2016 
2017 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
2018 /// fields of an internal instruction (and possibly its SIB byte) to a memory
2019 /// operand in LLVM's format, and appends it to an MCInst.
2020 ///
2021 /// @param mcInst - The MCInst to append to.
2022 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
2023 /// from.
2024 /// @param ForceSIB - The instruction must use SIB.
2025 /// @return - 0 on success; nonzero otherwise
2026 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
2027  const MCDisassembler *Dis,
2028  bool ForceSIB = false) {
2029  // Addresses in an MCInst are represented as five operands:
2030  // 1. basereg (register) The R/M base, or (if there is a SIB) the
2031  // SIB base
2032  // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
2033  // scale amount
2034  // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
2035  // the index (which is multiplied by the
2036  // scale amount)
2037  // 4. displacement (immediate) 0, or the displacement if there is one
2038  // 5. segmentreg (register) x86_registerNONE for now, but could be set
2039  // if we have segment overrides
2040 
2041  MCOperand baseReg;
2042  MCOperand scaleAmount;
2043  MCOperand indexReg;
2044  MCOperand displacement;
2045  MCOperand segmentReg;
2046  uint64_t pcrel = 0;
2047 
2048  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
2049  if (insn.sibBase != SIB_BASE_NONE) {
2050  switch (insn.sibBase) {
2051  default:
2052  debug("Unexpected sibBase");
2053  return true;
2054 #define ENTRY(x) \
2055  case SIB_BASE_##x: \
2056  baseReg = MCOperand::createReg(X86::x); break;
2058 #undef ENTRY
2059  }
2060  } else {
2061  baseReg = MCOperand::createReg(X86::NoRegister);
2062  }
2063 
2064  if (insn.sibIndex != SIB_INDEX_NONE) {
2065  switch (insn.sibIndex) {
2066  default:
2067  debug("Unexpected sibIndex");
2068  return true;
2069 #define ENTRY(x) \
2070  case SIB_INDEX_##x: \
2071  indexReg = MCOperand::createReg(X86::x); break;
2074  REGS_XMM
2075  REGS_YMM
2076  REGS_ZMM
2077 #undef ENTRY
2078  }
2079  } else {
2080  // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,
2081  // but no index is used and modrm alone should have been enough.
2082  // -No base register in 32-bit mode. In 64-bit mode this is used to
2083  // avoid rip-relative addressing.
2084  // -Any base register used other than ESP/RSP/R12D/R12. Using these as a
2085  // base always requires a SIB byte.
2086  // -A scale other than 1 is used.
2087  if (!ForceSIB &&
2088  (insn.sibScale != 1 ||
2089  (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||
2090  (insn.sibBase != SIB_BASE_NONE &&
2091  insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
2092  insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
2093  indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ :
2094  X86::RIZ);
2095  } else
2096  indexReg = MCOperand::createReg(X86::NoRegister);
2097  }
2098 
2099  scaleAmount = MCOperand::createImm(insn.sibScale);
2100  } else {
2101  switch (insn.eaBase) {
2102  case EA_BASE_NONE:
2103  if (insn.eaDisplacement == EA_DISP_NONE) {
2104  debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2105  return true;
2106  }
2107  if (insn.mode == MODE_64BIT){
2108  pcrel = insn.startLocation +
2111  insn.displacementOffset,
2112  insn.displacement + pcrel, Dis);
2113  // Section 2.2.1.6
2114  baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP :
2115  X86::RIP);
2116  }
2117  else
2118  baseReg = MCOperand::createReg(X86::NoRegister);
2119 
2120  indexReg = MCOperand::createReg(X86::NoRegister);
2121  break;
2122  case EA_BASE_BX_SI:
2123  baseReg = MCOperand::createReg(X86::BX);
2124  indexReg = MCOperand::createReg(X86::SI);
2125  break;
2126  case EA_BASE_BX_DI:
2127  baseReg = MCOperand::createReg(X86::BX);
2128  indexReg = MCOperand::createReg(X86::DI);
2129  break;
2130  case EA_BASE_BP_SI:
2131  baseReg = MCOperand::createReg(X86::BP);
2132  indexReg = MCOperand::createReg(X86::SI);
2133  break;
2134  case EA_BASE_BP_DI:
2135  baseReg = MCOperand::createReg(X86::BP);
2136  indexReg = MCOperand::createReg(X86::DI);
2137  break;
2138  default:
2139  indexReg = MCOperand::createReg(X86::NoRegister);
2140  switch (insn.eaBase) {
2141  default:
2142  debug("Unexpected eaBase");
2143  return true;
2144  // Here, we will use the fill-ins defined above. However,
2145  // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
2146  // sib and sib64 were handled in the top-level if, so they're only
2147  // placeholders to keep the compiler happy.
2148 #define ENTRY(x) \
2149  case EA_BASE_##x: \
2150  baseReg = MCOperand::createReg(X86::x); break;
2151  ALL_EA_BASES
2152 #undef ENTRY
2153 #define ENTRY(x) case EA_REG_##x:
2154  ALL_REGS
2155 #undef ENTRY
2156  debug("A R/M memory operand may not be a register; "
2157  "the base field must be a base.");
2158  return true;
2159  }
2160  }
2161 
2162  scaleAmount = MCOperand::createImm(1);
2163  }
2164 
2165  displacement = MCOperand::createImm(insn.displacement);
2166 
2168 
2169  mcInst.addOperand(baseReg);
2170  mcInst.addOperand(scaleAmount);
2171  mcInst.addOperand(indexReg);
2172  if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
2173  insn.startLocation, insn.displacementOffset,
2174  insn.displacementSize, mcInst, Dis))
2175  mcInst.addOperand(displacement);
2176  mcInst.addOperand(segmentReg);
2177  return false;
2178 }
2179 
2180 /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
2181 /// byte of an instruction to LLVM form, and appends it to an MCInst.
2182 ///
2183 /// @param mcInst - The MCInst to append to.
2184 /// @param operand - The operand, as stored in the descriptor table.
2185 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
2186 /// from.
2187 /// @return - 0 on success; nonzero otherwise
2188 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
2189  InternalInstruction &insn, const MCDisassembler *Dis) {
2190  switch (operand.type) {
2191  default:
2192  debug("Unexpected type for a R/M operand");
2193  return true;
2194  case TYPE_R8:
2195  case TYPE_R16:
2196  case TYPE_R32:
2197  case TYPE_R64:
2198  case TYPE_Rv:
2199  case TYPE_MM64:
2200  case TYPE_XMM:
2201  case TYPE_YMM:
2202  case TYPE_ZMM:
2203  case TYPE_TMM:
2204  case TYPE_VK_PAIR:
2205  case TYPE_VK:
2206  case TYPE_DEBUGREG:
2207  case TYPE_CONTROLREG:
2208  case TYPE_BNDR:
2209  return translateRMRegister(mcInst, insn);
2210  case TYPE_M:
2211  case TYPE_MVSIBX:
2212  case TYPE_MVSIBY:
2213  case TYPE_MVSIBZ:
2214  return translateRMMemory(mcInst, insn, Dis);
2215  case TYPE_MSIB:
2216  return translateRMMemory(mcInst, insn, Dis, true);
2217  }
2218 }
2219 
2220 /// translateFPRegister - Translates a stack position on the FPU stack to its
2221 /// LLVM form, and appends it to an MCInst.
2222 ///
2223 /// @param mcInst - The MCInst to append to.
2224 /// @param stackPos - The stack position to translate.
2225 static void translateFPRegister(MCInst &mcInst,
2226  uint8_t stackPos) {
2227  mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos));
2228 }
2229 
2230 /// translateMaskRegister - Translates a 3-bit mask register number to
2231 /// LLVM form, and appends it to an MCInst.
2232 ///
2233 /// @param mcInst - The MCInst to append to.
2234 /// @param maskRegNum - Number of mask register from 0 to 7.
2235 /// @return - false on success; true otherwise.
2236 static bool translateMaskRegister(MCInst &mcInst,
2237  uint8_t maskRegNum) {
2238  if (maskRegNum >= 8) {
2239  debug("Invalid mask register number");
2240  return true;
2241  }
2242 
2243  mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum));
2244  return false;
2245 }
2246 
2247 /// translateOperand - Translates an operand stored in an internal instruction
2248 /// to LLVM's format and appends it to an MCInst.
2249 ///
2250 /// @param mcInst - The MCInst to append to.
2251 /// @param operand - The operand, as stored in the descriptor table.
2252 /// @param insn - The internal instruction.
2253 /// @return - false on success; true otherwise.
2254 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
2255  InternalInstruction &insn,
2256  const MCDisassembler *Dis) {
2257  switch (operand.encoding) {
2258  default:
2259  debug("Unhandled operand encoding during translation");
2260  return true;
2261  case ENCODING_REG:
2262  translateRegister(mcInst, insn.reg);
2263  return false;
2264  case ENCODING_WRITEMASK:
2265  return translateMaskRegister(mcInst, insn.writemask);
2266  case ENCODING_SIB:
2269  return translateRM(mcInst, operand, insn, Dis);
2270  case ENCODING_IB:
2271  case ENCODING_IW:
2272  case ENCODING_ID:
2273  case ENCODING_IO:
2274  case ENCODING_Iv:
2275  case ENCODING_Ia:
2276  translateImmediate(mcInst,
2277  insn.immediates[insn.numImmediatesTranslated++],
2278  operand,
2279  insn,
2280  Dis);
2281  return false;
2282  case ENCODING_IRC:
2283  mcInst.addOperand(MCOperand::createImm(insn.RC));
2284  return false;
2285  case ENCODING_SI:
2286  return translateSrcIndex(mcInst, insn);
2287  case ENCODING_DI:
2288  return translateDstIndex(mcInst, insn);
2289  case ENCODING_RB:
2290  case ENCODING_RW:
2291  case ENCODING_RD:
2292  case ENCODING_RO:
2293  case ENCODING_Rv:
2294  translateRegister(mcInst, insn.opcodeRegister);
2295  return false;
2296  case ENCODING_CC:
2297  mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));
2298  return false;
2299  case ENCODING_FP:
2300  translateFPRegister(mcInst, insn.modRM & 7);
2301  return false;
2302  case ENCODING_VVVV:
2303  translateRegister(mcInst, insn.vvvv);
2304  return false;
2305  case ENCODING_DUP:
2306  return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
2307  insn, Dis);
2308  }
2309 }
2310 
2311 /// translateInstruction - Translates an internal instruction and all its
2312 /// operands to an MCInst.
2313 ///
2314 /// @param mcInst - The MCInst to populate with the instruction's data.
2315 /// @param insn - The internal instruction.
2316 /// @return - false on success; true otherwise.
2317 static bool translateInstruction(MCInst &mcInst,
2318  InternalInstruction &insn,
2319  const MCDisassembler *Dis) {
2320  if (!insn.spec) {
2321  debug("Instruction has no specification");
2322  return true;
2323  }
2324 
2325  mcInst.clear();
2326  mcInst.setOpcode(insn.instructionID);
2327  // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
2328  // prefix bytes should be disassembled as xrelease and xacquire then set the
2329  // opcode to those instead of the rep and repne opcodes.
2330  if (insn.xAcquireRelease) {
2331  if(mcInst.getOpcode() == X86::REP_PREFIX)
2332  mcInst.setOpcode(X86::XRELEASE_PREFIX);
2333  else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
2334  mcInst.setOpcode(X86::XACQUIRE_PREFIX);
2335  }
2336 
2337  insn.numImmediatesTranslated = 0;
2338 
2339  for (const auto &Op : insn.operands) {
2340  if (Op.encoding != ENCODING_NONE) {
2341  if (translateOperand(mcInst, Op, insn, Dis)) {
2342  return true;
2343  }
2344  }
2345  }
2346 
2347  return false;
2348 }
2349 
2351  const MCSubtargetInfo &STI,
2352  MCContext &Ctx) {
2353  std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
2354  return new X86GenericDisassembler(STI, Ctx, std::move(MII));
2355 }
2356 
2358  // Register the disassembler.
2363 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::X86Disassembler::MODE_16BIT
@ MODE_16BIT
Definition: X86DisassemblerDecoderCommon.h:462
i
i
Definition: README.txt:29
byte
SSE Variable shift can be custom lowered to something like which uses a small table unaligned load shuffle instead of going through memory byte
Definition: README-SSE.txt:11
llvm::X86Disassembler::InternalInstruction::xAcquireRelease
bool xAcquireRelease
Definition: X86DisassemblerDecoder.h:552
vvvvFromVEX3of3
#define vvvvFromVEX3of3(vex)
Definition: X86DisassemblerDecoder.h:56
llvm::X86Disassembler::InternalInstruction::vectorExtensionType
VectorExtensionType vectorExtensionType
Definition: X86DisassemblerDecoder.h:546
bFromXOP2of3
#define bFromXOP2of3(xop)
Definition: X86DisassemblerDecoder.h:67
baseFromSIB
#define baseFromSIB(sib)
Definition: X86DisassemblerDecoder.h:30
llvm::X86Disassembler::MODE_64BIT
@ MODE_64BIT
Definition: X86DisassemblerDecoderCommon.h:464
llvm::X86Disassembler::InternalInstruction::hasLockPrefix
bool hasLockPrefix
Definition: X86DisassemblerDecoder.h:559
llvm::X86Disassembler::InternalInstruction::displacement
int32_t displacement
Definition: X86DisassemblerDecoder.h:608
xFromXOP2of3
#define xFromXOP2of3(xop)
Definition: X86DisassemblerDecoder.h:66
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:102
llvm
Definition: AllocatorList.h:23
THREEDNOW_MAP_SYM
#define THREEDNOW_MAP_SYM
Definition: X86DisassemblerDecoderCommon.h:33
lFromVEX2of2
#define lFromVEX2of2(vex)
Definition: X86DisassemblerDecoder.h:62
llvm::X86Disassembler::TYPE_NO_VEX_XOP
@ TYPE_NO_VEX_XOP
Definition: X86DisassemblerDecoder.h:510
wFromVEX3of3
#define wFromVEX3of3(vex)
Definition: X86DisassemblerDecoder.h:55
rFromEVEX2of4
#define rFromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:36
type
llvm::X86Disassembler::VEX_LOB_0F3A
@ VEX_LOB_0F3A
Definition: X86DisassemblerDecoder.h:492
llvm::X86Disassembler::SEG_OVERRIDE_GS
@ SEG_OVERRIDE_GS
Definition: X86DisassemblerDecoder.h:484
OpcodeDecision
Definition: X86Disassembler.cpp:108
OpcodeDecision::modRMDecisions
ModRMDecision modRMDecisions[256]
Definition: X86Disassembler.cpp:109
mmFromEVEX2of4
#define mmFromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:40
ppFromXOP3of3
#define ppFromXOP3of3(xop)
Definition: X86DisassemblerDecoder.h:72
MCDisassembler.h
debug
#define debug(s)
Definition: X86Disassembler.cpp:96
llvm::MCOperand::createImm
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:71
llvm::X86Disassembler::ATTR_REXW
@ ATTR_REXW
Definition: X86DisassemblerDecoderCommon.h:54
llvm::X86::IP_HAS_OP_SIZE
@ IP_HAS_OP_SIZE
Definition: X86BaseInfo.h:58
llvm::getTheX86_64Target
Target & getTheX86_64Target()
Definition: X86TargetInfo.cpp:17
is64Bit
static bool is64Bit(const char *name)
Definition: X86Disassembler.cpp:1005
CASE_ENCODING_RM
#define CASE_ENCODING_RM
Definition: X86DisassemblerDecoderCommon.h:335
llvm::X86Disassembler::TYPE_EVEX
@ TYPE_EVEX
Definition: X86DisassemblerDecoder.h:513
llvm::X86Disassembler::InstructionSpecifier::operands
uint16_t operands
Definition: X86DisassemblerDecoder.h:520
llvm::X86Disassembler::SEG_OVERRIDE_SS
@ SEG_OVERRIDE_SS
Definition: X86DisassemblerDecoder.h:480
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:125
op
#define op(i)
translateImmediate
static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateImmediate - Appends an immediate operand to an MCInst.
Definition: X86Disassembler.cpp:1881
isREX
static bool isREX(struct InternalInstruction *insn, uint8_t prefix)
Definition: X86Disassembler.cpp:199
mmmmmFromVEX2of3
#define mmmmmFromVEX2of3(vex)
Definition: X86DisassemblerDecoder.h:54
ModRMDecision::instructionIDs
uint16_t instructionIDs
Definition: X86Disassembler.cpp:103
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:469
ppFromVEX2of2
#define ppFromVEX2of2(vex)
Definition: X86DisassemblerDecoder.h:63
llvm::X86AS::GS
@ GS
Definition: X86.h:187
llvm::X86Disassembler::VEX_LOB_0F38
@ VEX_LOB_0F38
Definition: X86DisassemblerDecoder.h:491
Fail
#define Fail
Definition: AArch64Disassembler.cpp:249
llvm::X86Disassembler::InternalInstruction::length
size_t length
Definition: X86DisassemblerDecoder.h:537
llvm::X86Disassembler::InternalInstruction::consumedModRM
bool consumedModRM
Definition: X86DisassemblerDecoder.h:601
llvm::X86Disassembler::InternalInstruction::segmentOverride
SegmentOverride segmentOverride
Definition: X86DisassemblerDecoder.h:550
llvm::X86Disassembler::SEG_OVERRIDE_FS
@ SEG_OVERRIDE_FS
Definition: X86DisassemblerDecoder.h:483
modFromModRM
#define modFromModRM(modRM)
Definition: X86DisassemblerDecoder.h:25
scaleFromSIB
#define scaleFromSIB(sib)
Definition: X86DisassemblerDecoder.h:28
llvm::X86::IP_HAS_LOCK
@ IP_HAS_LOCK
Definition: X86BaseInfo.h:62
llvm::X86Disassembler::InternalInstruction::opcodeType
OpcodeType opcodeType
Definition: X86DisassemblerDecoder.h:582
readModRM
static int readModRM(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:601
translateFPRegister
static void translateFPRegister(MCInst &mcInst, uint8_t stackPos)
translateFPRegister - Translates a stack position on the FPU stack to its LLVM form,...
Definition: X86Disassembler.cpp:2225
nextByte
static int nextByte(ArrayRef< uint8_t > Bytes, uint64_t &Size)
Definition: WebAssemblyDisassembler.cpp:76
createX86Disassembler
static MCDisassembler * createX86Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
Definition: X86Disassembler.cpp:2350
llvm::X86Disassembler::EA_BASE_NONE
@ EA_BASE_NONE
Definition: X86DisassemblerDecoder.h:426
llvm::X86Disassembler::TYPE_XOP
@ TYPE_XOP
Definition: X86DisassemblerDecoder.h:514
llvm::X86Disassembler::InternalInstruction::sibIndex
SIBIndex sibIndex
Definition: X86DisassemblerDecoder.h:634
llvm::X86Disassembler::XOP_MAP_SELECT_8
@ XOP_MAP_SELECT_8
Definition: X86DisassemblerDecoder.h:496
llvm::TargetRegistry::RegisterMCDisassembler
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.
Definition: TargetRegistry.h:868
rFromXOP2of3
#define rFromXOP2of3(xop)
Definition: X86DisassemblerDecoder.h:65
T
#define T
Definition: Mips16ISelLowering.cpp:341
ret
to esp esp setne al movzbw ax esp setg cl movzbw cx cmove cx cl jne LBB1_2 esp ret(also really horrible code on ppc). This is due to the expand code for 64-bit compares. GCC produces multiple branches
llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:40
llvm::X86Disassembler::MODE_32BIT
@ MODE_32BIT
Definition: X86DisassemblerDecoderCommon.h:463
X86DisassemblerDecoder.h
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
rFromVEX2of3
#define rFromVEX2of3(vex)
Definition: X86DisassemblerDecoder.h:51
ModRMDecision
Definition: X86Disassembler.cpp:101
translateSrcIndex
static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn)
translateSrcIndex - Appends a source index operand to an MCInst.
Definition: X86Disassembler.cpp:1834
vvvvFromVEX2of2
#define vvvvFromVEX2of2(vex)
Definition: X86DisassemblerDecoder.h:61
llvm::X86Disassembler::ATTR_EVEX
@ ATTR_EVEX
Definition: X86DisassemblerDecoderCommon.h:59
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
llvm::X86Disassembler::ATTR_EVEXK
@ ATTR_EVEXK
Definition: X86DisassemblerDecoderCommon.h:61
llvm::X86::BP_SI
@ BP_SI
Definition: X86Disassembler.cpp:1661
llvm::SIInstrFlags::DS
@ DS
Definition: SIDefines.h:52
fixupReg
static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)
Definition: X86Disassembler.cpp:836
Format.h
ALL_SIB_BASES
#define ALL_SIB_BASES
Definition: X86DisassemblerDecoder.h:399
x86
Note that only the low bits of effective_addr2 are used On bit we don t eliminate the computation of the top half of effective_addr2 because we don t have whole function selection dags On x86
Definition: README.txt:318
rFromVEX2of2
#define rFromVEX2of2(vex)
Definition: X86DisassemblerDecoder.h:60
llvm::X86Disassembler::InternalInstruction::opcode
uint8_t opcode
Definition: X86DisassemblerDecoder.h:577
llvm::X86Disassembler::InternalInstruction::numImmediatesTranslated
uint8_t numImmediatesTranslated
Definition: X86DisassemblerDecoder.h:612
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
translateRMRegister
static bool translateRMRegister(MCInst &mcInst, InternalInstruction &insn)
translateRMRegister - Translates a register stored in the R/M field of the ModR/M byte to its LLVM eq...
Definition: X86Disassembler.cpp:1987
peek
static bool peek(struct InternalInstruction *insn, uint8_t &byte)
Definition: X86Disassembler.cpp:178
llvm::X86Disassembler::ATTR_XS
@ ATTR_XS
Definition: X86DisassemblerDecoderCommon.h:52
bFromREX
#define bFromREX(rex)
Definition: X86DisassemblerDecoder.h:34
getInstructionIDWithAttrMask
static int getInstructionIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)
Definition: X86Disassembler.cpp:1016
llvm::MCInst::setOpcode
void setOpcode(unsigned Op)
Definition: MCInst.h:197
llvm::X86Disassembler::InternalInstruction::opcodeRegister
Reg opcodeRegister
Definition: X86DisassemblerDecoder.h:616
llvm::X86Disassembler::InstructionContext
InstructionContext
Definition: X86DisassemblerDecoderCommon.h:277
llvm::X86Disassembler::EA_DISP_32
@ EA_DISP_32
Definition: X86DisassemblerDecoder.h:465
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
xFromREX
#define xFromREX(rex)
Definition: X86DisassemblerDecoder.h:33
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
EA_BASES_64BIT
#define EA_BASES_64BIT
Definition: X86DisassemblerDecoder.h:169
llvm::X86Disassembler::SEG_OVERRIDE_CS
@ SEG_OVERRIDE_CS
Definition: X86DisassemblerDecoder.h:479
r2FromEVEX2of4
#define r2FromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:39
llvm::X86Disassembler::ATTR_EVEXL2
@ ATTR_EVEXL2
Definition: X86DisassemblerDecoderCommon.h:60
readSIB
static int readSIB(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:508
llvm::X86Disassembler::SIB_BASE_NONE
@ SIB_BASE_NONE
Definition: X86DisassemblerDecoder.h:453
llvm::X86Disassembler::XOP_MAP_SELECT_A
@ XOP_MAP_SELECT_A
Definition: X86DisassemblerDecoder.h:498
lFromVEX3of3
#define lFromVEX3of3(vex)
Definition: X86DisassemblerDecoder.h:57
llvm::X86Disassembler::THREEBYTE_38
@ THREEBYTE_38
Definition: X86DisassemblerDecoderCommon.h:288
llvm::X86Disassembler::VEX_LOB_0F
@ VEX_LOB_0F
Definition: X86DisassemblerDecoder.h:490
wFromEVEX3of4
#define wFromEVEX3of4(evex)
Definition: X86DisassemblerDecoder.h:41
llvm::X86Disassembler::InternalInstruction
The x86 internal instruction, which is produced by the decoder.
Definition: X86DisassemblerDecoder.h:524
llvm::X86Disassembler::SEG_OVERRIDE_ES
@ SEG_OVERRIDE_ES
Definition: X86DisassemblerDecoder.h:482
llvm::X86::IP_HAS_AD_SIZE
@ IP_HAS_AD_SIZE
Definition: X86BaseInfo.h:59
llvm::N86::ESI
@ ESI
Definition: X86MCTargetDesc.h:51
llvm::X86Disassembler::InternalInstruction::rexPrefix
uint8_t rexPrefix
Definition: X86DisassemblerDecoder.h:548
llvm::X86Disassembler::EA_DISP_16
@ EA_DISP_16
Definition: X86DisassemblerDecoder.h:464
llvm::X86Disassembler::SIB_INDEX_NONE
@ SIB_INDEX_NONE
Definition: X86DisassemblerDecoder.h:441
MCContext.h
MCInstrInfo.h
llvm::X86Disassembler::InternalInstruction::hasAdSize
bool hasAdSize
Definition: X86DisassemblerDecoder.h:555
vvvvFromEVEX3of4
#define vvvvFromEVEX3of4(evex)
Definition: X86DisassemblerDecoder.h:42
XOP8_MAP_SYM
#define XOP8_MAP_SYM
Definition: X86DisassemblerDecoderCommon.h:30
llvm::X86Disassembler::InternalInstruction::vvvv
Reg vvvv
Definition: X86DisassemblerDecoder.h:594
MCInst.h
readImmediate
static int readImmediate(struct InternalInstruction *insn, uint8_t size)
Definition: X86Disassembler.cpp:1399
llvm::N86::EDI
@ EDI
Definition: X86MCTargetDesc.h:51
EA_BASES_32BIT
#define EA_BASES_32BIT
Definition: X86DisassemblerDecoder.h:133
MCSubtargetInfo.h
llvm::MCSubtargetInfo::getFeatureBits
const FeatureBitset & getFeatureBits() const
Definition: MCSubtargetInfo.h:111
readOperands
static int readOperands(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:1483
llvm::X86Disassembler::VEX_PREFIX_F2
@ VEX_PREFIX_F2
Definition: X86DisassemblerDecoder.h:506
llvm::X86Disassembler::InternalInstruction::startLocation
uint64_t startLocation
Definition: X86DisassemblerDecoder.h:535
ppFromVEX3of3
#define ppFromVEX3of3(vex)
Definition: X86DisassemblerDecoder.h:58
llvm::X86::BP_DI
@ BP_DI
Definition: X86Disassembler.cpp:1662
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::MCInst::setFlags
void setFlags(unsigned F)
Definition: MCInst.h:200
llvm::X86Disassembler::XOPA_MAP
@ XOPA_MAP
Definition: X86DisassemblerDecoderCommon.h:292
llvm::X86Disassembler::SIBIndex
SIBIndex
All possible values of the SIB index field.
Definition: X86DisassemblerDecoder.h:440
llvm::X86Disassembler::OperandEncoding
OperandEncoding
Definition: X86DisassemblerDecoderCommon.h:398
llvm::X86Disassembler::EA_DISP_NONE
@ EA_DISP_NONE
Definition: X86DisassemblerDecoder.h:462
translateOperand
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateOperand - Translates an operand stored in an internal instruction to LLVM's format and appen...
Definition: X86Disassembler.cpp:2254
bFromVEX2of3
#define bFromVEX2of3(vex)
Definition: X86DisassemblerDecoder.h:53
wFromREX
#define wFromREX(rex)
Definition: X86DisassemblerDecoder.h:31
llvm::X86Disassembler::ATTR_VEX
@ ATTR_VEX
Definition: X86DisassemblerDecoderCommon.h:57
llvm::MCDisassembler::DecodeStatus
DecodeStatus
Ternary decode status.
Definition: MCDisassembler.h:100
llvm::MCInst::addOperand
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
ONEBYTE_SYM
#define ONEBYTE_SYM
Definition: X86DisassemblerDecoderCommon.h:26
llvm::X86Disassembler::InternalInstruction::displacementSize
uint8_t displacementSize
Definition: X86DisassemblerDecoder.h:566
llvm::X86Disassembler::IC_max
@ IC_max
Definition: X86DisassemblerDecoderCommon.h:279
llvm::X86Disassembler::SIBBase
SIBBase
All possible values of the SIB base field.
Definition: X86DisassemblerDecoder.h:452
GENERIC_FIXUP_FUNC
#define GENERIC_FIXUP_FUNC(name, base, prefix, mask)
Definition: X86Disassembler.cpp:739
readMaskRegister
static int readMaskRegister(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:1470
getInstructionID
static int getInstructionID(struct InternalInstruction *insn, const MCInstrInfo *mii)
Definition: X86Disassembler.cpp:1065
llvm::X86Disassembler::InternalInstruction::modRM
uint8_t modRM
Definition: X86DisassemblerDecoder.h:602
llvm::X86Disassembler::InternalInstruction::eaRegBase
EABase eaRegBase
Definition: X86DisassemblerDecoder.h:622
llvm::X86Disassembler::TYPE_VEX_2B
@ TYPE_VEX_2B
Definition: X86DisassemblerDecoder.h:511
llvm::getTheX86_32Target
Target & getTheX86_32Target()
Definition: X86TargetInfo.cpp:13
lFromXOP3of3
#define lFromXOP3of3(xop)
Definition: X86DisassemblerDecoder.h:71
regFromModRM
#define regFromModRM(modRM)
Definition: X86DisassemblerDecoder.h:26
LLVMInitializeX86Disassembler
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler()
Definition: X86Disassembler.cpp:2357
X86MCTargetDesc.h
llvm::X86Disassembler::ATTR_OPSIZE
@ ATTR_OPSIZE
Definition: X86DisassemblerDecoderCommon.h:55
llvm::X86Disassembler::InternalInstruction::RC
uint8_t RC
Definition: X86DisassemblerDecoder.h:639
llvm::X86Disassembler::VEX_PREFIX_F3
@ VEX_PREFIX_F3
Definition: X86DisassemblerDecoder.h:505
llvm::X86Disassembler::InternalInstruction::bytes
llvm::ArrayRef< uint8_t > bytes
Definition: X86DisassemblerDecoder.h:526
llvm::X86::IP_HAS_REPEAT
@ IP_HAS_REPEAT
Definition: X86BaseInfo.h:61
index
splat index
Definition: README_ALTIVEC.txt:181
llvm::X86Disassembler::ATTR_EVEXB
@ ATTR_EVEXB
Definition: X86DisassemblerDecoderCommon.h:63
llvm::X86Disassembler::ATTR_VEXL
@ ATTR_VEXL
Definition: X86DisassemblerDecoderCommon.h:58
tryAddingSymbolicOperand
static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, uint64_t Address, uint64_t Offset, uint64_t Width, MCInst &MI, const MCDisassembler *Dis)
tryAddingSymbolicOperand - trys to add a symbolic operand in place of the immediate Value in the MCIn...
Definition: X86Disassembler.cpp:1799
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:132
llvm::X86Disassembler::XOP8_MAP
@ XOP8_MAP
Definition: X86DisassemblerDecoderCommon.h:290
llvm::MCDisassembler::tryAddingPcLoadReferenceComment
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const
Definition: MCDisassembler.cpp:36
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::MCDisassembler
Superclass for all disassemblers.
Definition: MCDisassembler.h:76
ContextDecision
Definition: X86Disassembler.cpp:117
llvm::X86Disassembler::InternalInstruction::writemask
Reg writemask
Definition: X86DisassemblerDecoder.h:597
vvvvFromXOP3of3
#define vvvvFromXOP3of3(vex)
Definition: X86DisassemblerDecoder.h:70
decode
static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)
Definition: X86Disassembler.cpp:123
llvm::X86Disassembler::XOP_MAP_SELECT_9
@ XOP_MAP_SELECT_9
Definition: X86DisassemblerDecoder.h:497
llvm::X86Disassembler::InternalInstruction::immediates
uint64_t immediates[2]
Definition: X86DisassemblerDecoder.h:613
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:20
llvm::X86Disassembler::InternalInstruction::eaBase
EABase eaBase
Definition: X86DisassemblerDecoder.h:627
mmmmmFromXOP2of3
#define mmmmmFromXOP2of3(xop)
Definition: X86DisassemblerDecoder.h:68
llvm::X86Disassembler::EABase
EABase
All possible values of the base field for effective-address computations, a.k.a.
Definition: X86DisassemblerDecoder.h:425
llvm::MCOI::OperandType
OperandType
Operands are tagged with one of the values of this enum.
Definition: MCInstrDesc.h:56
llvm::X86Disassembler::InternalInstruction::reg
Reg reg
Definition: X86DisassemblerDecoder.h:630
llvm::X86Disassembler::InternalInstruction::sib
uint8_t sib
Definition: X86DisassemblerDecoder.h:605
llvm::X86Disassembler::InternalInstruction::immediateOffset
uint8_t immediateOffset
Definition: X86DisassemblerDecoder.h:572
llvm::X86Disassembler::ONEBYTE
@ ONEBYTE
Definition: X86DisassemblerDecoderCommon.h:286
llvm::X86::IP_NO_PREFIX
@ IP_NO_PREFIX
Definition: X86BaseInfo.h:57
llvm::HighlightColor::Address
@ Address
ppFromEVEX3of4
#define ppFromEVEX3of4(evex)
Definition: X86DisassemblerDecoder.h:43
rmFromModRM
#define rmFromModRM(modRM)
Definition: X86DisassemblerDecoder.h:27
llvm::X86Disassembler::InternalInstruction::spec
const InstructionSpecifier * spec
Definition: X86DisassemblerDecoder.h:586
llvm::X86Disassembler::ATTR_ADSIZE
@ ATTR_ADSIZE
Definition: X86DisassemblerDecoderCommon.h:56
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
translateInstruction
static bool translateInstruction(MCInst &target, InternalInstruction &source, const MCDisassembler *Dis)
translateInstruction - Translates an internal instruction and all its operands to an MCInst.
Definition: X86Disassembler.cpp:2317
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1540
SI
StandardInstrumentations SI(Debug, VerifyEach)
base
therefore end up llgh r3 lr r0 br r14 but truncating the load would lh r3 br r14 Functions ret i64 and ought to be implemented ngr r0 br r14 but two address optimizations reverse the order of the AND and ngr r2 lgr r0 br r14 CodeGen SystemZ and ll has several examples of this Out of range displacements are usually handled by loading the full address into a register In many cases it would be better to create an anchor point instead E g i64 base
Definition: README.txt:125
llvm::X86Disassembler::InternalInstruction::instructionID
uint16_t instructionID
Definition: X86DisassemblerDecoder.h:584
llvm::X86::BX_SI
@ BX_SI
Definition: X86Disassembler.cpp:1659
llvm::X86Disassembler::ATTR_EVEXKZ
@ ATTR_EVEXKZ
Definition: X86DisassemblerDecoderCommon.h:62
isBranch
static bool isBranch(unsigned Opcode)
Definition: R600InstrInfo.cpp:646
llvm::X86Disassembler::OperandSpecifier::encoding
uint8_t encoding
Definition: X86DisassemblerDecoderCommon.h:452
llvm::MCDisassembler::tryAddingSymbolicOperand
bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t InstSize) const
Definition: MCDisassembler.cpp:26
llvm::MCInstrInfo::getName
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:68
llvm::X86Disassembler::InternalInstruction::addressSize
uint8_t addressSize
Definition: X86DisassemblerDecoder.h:565
llvm::X86Disassembler::InternalInstruction::numImmediatesConsumed
uint8_t numImmediatesConsumed
Definition: X86DisassemblerDecoder.h:611
zFromEVEX4of4
#define zFromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:44
llvm::X86Disassembler::InternalInstruction::operands
ArrayRef< OperandSpecifier > operands
Definition: X86DisassemblerDecoder.h:641
llvm::X86Disassembler::XOP9_MAP
@ XOP9_MAP
Definition: X86DisassemblerDecoderCommon.h:291
llvm::X86Disassembler::InternalInstruction::displacementOffset
uint8_t displacementOffset
Definition: X86DisassemblerDecoder.h:571
llvm::X86Disassembler::OpcodeType
OpcodeType
Definition: X86DisassemblerDecoderCommon.h:285
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1463
llvm::HexagonMCInstrInfo::isPrefix
bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI)
Definition: HexagonMCInstrInfo.cpp:724
llvm::ArrayRef< uint8_t >
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
bFromEVEX2of4
#define bFromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:38
llvm::X86Disassembler::InternalInstruction::regBase
Reg regBase
Definition: X86DisassemblerDecoder.h:623
readVVVV
static int readVVVV(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:1443
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::MCOperand::createReg
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
uint32_t
translateDstIndex
static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn)
translateDstIndex - Appends a destination index operand to an MCInst.
Definition: X86Disassembler.cpp:1859
llvm::X86Disassembler::OperandSpecifier
The specification for how to extract and interpret one operand.
Definition: X86DisassemblerDecoderCommon.h:451
readOpcodeRegister
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
Definition: X86Disassembler.cpp:1356
llvm::X86Disassembler::InternalInstruction::repeatPrefix
uint8_t repeatPrefix
Definition: X86DisassemblerDecoder.h:561
translateRM
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateRM - Translates an operand stored in the R/M (and possibly SIB) byte of an instruction to LL...
Definition: X86Disassembler.cpp:2188
llvm::X86Disassembler::InternalInstruction::eaDisplacement
EADisplacement eaDisplacement
Definition: X86DisassemblerDecoder.h:628
rFromREX
#define rFromREX(rex)
Definition: X86DisassemblerDecoder.h:32
llvm::X86AS::SS
@ SS
Definition: X86.h:189
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::X86Disassembler::VEX_PREFIX_66
@ VEX_PREFIX_66
Definition: X86DisassemblerDecoder.h:504
tryAddingPcLoadReferenceComment
static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, const void *Decoder)
tryAddingPcLoadReferenceComment - trys to add a comment as to what is being referenced by a load inst...
Definition: X86Disassembler.cpp:1814
ContextDecision::opcodeDecisions
OpcodeDecision opcodeDecisions[IC_max]
Definition: X86Disassembler.cpp:118
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:281
readOpcode
static bool readOpcode(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:873
llvm::X86Disassembler::InternalInstruction::readerCursor
uint64_t readerCursor
Definition: X86DisassemblerDecoder.h:528
TWOBYTE_SYM
#define TWOBYTE_SYM
Definition: X86DisassemblerDecoderCommon.h:27
llvm::X86Disassembler::InternalInstruction::mode
DisassemblerMode mode
Definition: X86DisassemblerDecoder.h:533
llvm::X86Disassembler
Definition: X86DisassemblerDecoderCommon.h:22
v2FromEVEX4of4
#define v2FromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:48
aaaFromEVEX4of4
#define aaaFromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:49
REGS_ZMM
#define REGS_ZMM
Definition: X86DisassemblerDecoder.h:283
name
static const char * name
Definition: SVEIntrinsicOpts.cpp:84
readDisplacement
static int readDisplacement(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:570
ALL_REGS
#define ALL_REGS
Definition: X86DisassemblerDecoder.h:403
llvm::X86Disassembler::InternalInstruction::hasOpSize
bool hasOpSize
Definition: X86DisassemblerDecoder.h:557
llvm::X86Disassembler::ATTR_XD
@ ATTR_XD
Definition: X86DisassemblerDecoderCommon.h:53
llvm::MCInstrInfo
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:25
llvm::X86Disassembler::SEG_OVERRIDE_max
@ SEG_OVERRIDE_max
Definition: X86DisassemblerDecoder.h:485
REGS_YMM
#define REGS_YMM
Definition: X86DisassemblerDecoder.h:249
std
Definition: BitVector.h:838
XOPA_MAP_SYM
#define XOPA_MAP_SYM
Definition: X86DisassemblerDecoderCommon.h:32
translateRegister
static void translateRegister(MCInst &mcInst, Reg reg)
translateRegister - Translates an internal register to the appropriate LLVM register,...
Definition: X86Disassembler.cpp:1771
ALL_EA_BASES
#define ALL_EA_BASES
Definition: X86DisassemblerDecoder.h:394
uint16_t
translateRMMemory
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, const MCDisassembler *Dis, bool ForceSIB=false)
translateRMMemory - Translates a memory operand stored in the Mod and R/M fields of an internal instr...
Definition: X86Disassembler.cpp:2026
THREEBYTE38_SYM
#define THREEBYTE38_SYM
Definition: X86DisassemblerDecoderCommon.h:28
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
XOP9_MAP_SYM
#define XOP9_MAP_SYM
Definition: X86DisassemblerDecoderCommon.h:31
translateMaskRegister
static bool translateMaskRegister(MCInst &mcInst, uint8_t maskRegNum)
translateMaskRegister - Translates a 3-bit mask register number to LLVM form, and appends it to an MC...
Definition: X86Disassembler.cpp:2236
Success
#define Success
Definition: AArch64Disassembler.cpp:248
llvm::X86Disassembler::OperandSpecifier::type
uint8_t type
Definition: X86DisassemblerDecoderCommon.h:453
lFromEVEX4of4
#define lFromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:46
llvm::X86::IP_HAS_REPEAT_NE
@ IP_HAS_REPEAT_NE
Definition: X86BaseInfo.h:60
llvm::MCInst::getOpcode
unsigned getOpcode() const
Definition: MCInst.h:198
llvm::X86Disassembler::TWOBYTE
@ TWOBYTE
Definition: X86DisassemblerDecoderCommon.h:287
llvm::X86Disassembler::TYPE_VEX_3B
@ TYPE_VEX_3B
Definition: X86DisassemblerDecoder.h:512
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:403
wFromXOP3of3
#define wFromXOP3of3(xop)
Definition: X86DisassemblerDecoder.h:69
llvm::X86::sib
@ sib
Definition: X86Disassembler.cpp:1663
REGS_XMM
#define REGS_XMM
Definition: X86DisassemblerDecoder.h:215
THREEBYTE3A_SYM
#define THREEBYTE3A_SYM
Definition: X86DisassemblerDecoderCommon.h:29
llvm::X86Disassembler::EA_DISP_8
@ EA_DISP_8
Definition: X86DisassemblerDecoder.h:463
X86BaseInfo.h
llvm::X86Disassembler::DisassemblerMode
DisassemblerMode
Decoding mode for the Intel disassembler.
Definition: X86DisassemblerDecoderCommon.h:461
llvm::X86Disassembler::InternalInstruction::mandatoryPrefix
uint8_t mandatoryPrefix
Definition: X86DisassemblerDecoder.h:542
l2FromEVEX4of4
#define l2FromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:45
llvm::X86Disassembler::InternalInstruction::sibIndexBase
SIBIndex sibIndexBase
Definition: X86DisassemblerDecoder.h:633
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
INSTRUCTIONS_SYM
#define INSTRUCTIONS_SYM
Definition: X86DisassemblerDecoderCommon.h:24
llvm::X86Disassembler::InternalInstruction::sibScale
uint8_t sibScale
Definition: X86DisassemblerDecoder.h:635
llvm::StringRef::data
const LLVM_NODISCARD char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:149
llvm::X86AS::FS
@ FS
Definition: X86.h:188
is16BitEquivalent
static bool is16BitEquivalent(const char *orig, const char *equiv)
Definition: X86Disassembler.cpp:986
consume
static bool consume(InternalInstruction *insn, T &ptr)
Definition: X86Disassembler.cpp:186
bFromEVEX4of4
#define bFromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:47
llvm::MCOperand
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
llvm::X86::BX_DI
@ BX_DI
Definition: X86Disassembler.cpp:1660
CASE_ENCODING_VSIB
#define CASE_ENCODING_VSIB
Definition: X86DisassemblerDecoderCommon.h:344
xFromEVEX2of4
#define xFromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:37
llvm::X86Disassembler::OperandType
OperandType
Definition: X86DisassemblerDecoderCommon.h:444
xFromVEX2of3
#define xFromVEX2of3(vex)
Definition: X86DisassemblerDecoder.h:52
llvm::X86Disassembler::THREEDNOW_MAP
@ THREEDNOW_MAP
Definition: X86DisassemblerDecoderCommon.h:293
llvm::X86Disassembler::InstrUID
uint16_t InstrUID
Definition: X86DisassemblerDecoderCommon.h:303
llvm::X86Disassembler::SEG_OVERRIDE_DS
@ SEG_OVERRIDE_DS
Definition: X86DisassemblerDecoder.h:481
llvm::X86Disassembler::InstructionSpecifier
The specification for how to extract and interpret a full instruction and its operands.
Definition: X86DisassemblerDecoder.h:519
readPrefixes
static int readPrefixes(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:208
raw_ostream.h
llvm::X86Disassembler::InternalInstruction::registerSize
uint8_t registerSize
Definition: X86DisassemblerDecoder.h:564
indexFromSIB
#define indexFromSIB(sib)
Definition: X86DisassemblerDecoder.h:29
segmentRegnums
static const uint8_t segmentRegnums[SEG_OVERRIDE_max]
Definition: X86Disassembler.cpp:1820
llvm::X86Disassembler::ATTR_64BIT
@ ATTR_64BIT
Definition: X86DisassemblerDecoderCommon.h:51
X86
Unrolling by would eliminate the &in both leading to a net reduction in code size The resultant code would then also be suitable for exit value computation We miss a bunch of rotate opportunities on various including etc On X86
Definition: README.txt:568
X86TargetInfo.h
TargetRegistry.h
ModRMDecision::modrm_type
uint8_t modrm_type
Definition: X86Disassembler.cpp:102
MCExpr.h
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:75
llvm::X86Disassembler::THREEBYTE_3A
@ THREEBYTE_3A
Definition: X86DisassemblerDecoderCommon.h:289
llvm::X86Disassembler::ATTR_NONE
@ ATTR_NONE
Definition: X86DisassemblerDecoderCommon.h:50
llvm::X86::sib64
@ sib64
Definition: X86Disassembler.cpp:1664
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::MCInst::clear
void clear()
Definition: MCInst.h:215
llvm::X86Disassembler::InternalInstruction::immediateSize
uint8_t immediateSize
Definition: X86DisassemblerDecoder.h:567
llvm::X86Disassembler::InternalInstruction::vectorExtensionPrefix
uint8_t vectorExtensionPrefix[4]
Definition: X86DisassemblerDecoder.h:544
llvm::X86Disassembler::InternalInstruction::sibBase
SIBBase sibBase
Definition: X86DisassemblerDecoder.h:636