LLVM  14.0.0git
X86Disassembler.cpp
Go to the documentation of this file.
1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is part of the X86 Disassembler.
10 // It contains code to translate the data produced by the decoder into
11 // MCInsts.
12 //
13 //
14 // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
15 // 64-bit X86 instruction sets. The main decode sequence for an assembly
16 // instruction in this disassembler is:
17 //
18 // 1. Read the prefix bytes and determine the attributes of the instruction.
19 // These attributes, recorded in enum attributeBits
20 // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
21 // provides a mapping from bitmasks to contexts, which are represented by
22 // enum InstructionContext (ibid.).
23 //
24 // 2. Read the opcode, and determine what kind of opcode it is. The
25 // disassembler distinguishes four kinds of opcodes, which are enumerated in
26 // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
27 // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
28 // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
29 //
30 // 3. Depending on the opcode type, look in one of four ClassDecision structures
31 // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
32 // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
33 // a ModRMDecision (ibid.).
34 //
35 // 4. Some instructions, such as escape opcodes or extended opcodes, or even
36 // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
37 // ModR/M byte to complete decode. The ModRMDecision's type is an entry from
38 // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
39 // ModR/M byte is required and how to interpret it.
40 //
41 // 5. After resolving the ModRMDecision, the disassembler has a unique ID
42 // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
43 // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
44 // meanings of its operands.
45 //
46 // 6. For each operand, its encoding is an entry from OperandEncoding
47 // (X86DisassemblerDecoderCommon.h) and its type is an entry from
48 // OperandType (ibid.). The encoding indicates how to read it from the
49 // instruction; the type indicates how to interpret the value once it has
50 // been read. For example, a register operand could be stored in the R/M
51 // field of the ModR/M byte, the REG field of the ModR/M byte, or added to
52 // the main opcode. This is orthogonal from its meaning (an GPR or an XMM
53 // register, for instance). Given this information, the operands can be
54 // extracted and interpreted.
55 //
56 // 7. As the last step, the disassembler translates the instruction information
57 // and operands into a format understandable by the client - in this case, an
58 // MCInst for use by the MC infrastructure.
59 //
60 // The disassembler is broken broadly into two parts: the table emitter that
61 // emits the instruction decode tables discussed above during compilation, and
62 // the disassembler itself. The table emitter is documented in more detail in
63 // utils/TableGen/X86DisassemblerEmitter.h.
64 //
65 // X86Disassembler.cpp contains the code responsible for step 7, and for
66 // invoking the decoder to execute steps 1-6.
67 // X86DisassemblerDecoderCommon.h contains the definitions needed by both the
68 // table emitter and the disassembler.
69 // X86DisassemblerDecoder.h contains the public interface of the decoder,
70 // factored out into C for possible use by other projects.
71 // X86DisassemblerDecoder.c contains the source code of the decoder, which is
72 // responsible for steps 1-6.
73 //
74 //===----------------------------------------------------------------------===//
75 
79 #include "X86DisassemblerDecoder.h"
80 #include "llvm/MC/MCContext.h"
82 #include "llvm/MC/MCExpr.h"
83 #include "llvm/MC/MCInst.h"
84 #include "llvm/MC/MCInstrInfo.h"
86 #include "llvm/MC/TargetRegistry.h"
87 #include "llvm/Support/Debug.h"
88 #include "llvm/Support/Format.h"
90 
91 using namespace llvm;
92 using namespace llvm::X86Disassembler;
93 
94 #define DEBUG_TYPE "x86-disassembler"
95 
96 #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
97 
98 // Specifies whether a ModR/M byte is needed and (if so) which
99 // instruction each possible value of the ModR/M byte corresponds to. Once
100 // this information is known, we have narrowed down to a single instruction.
102  uint8_t modrm_type;
104 };
105 
106 // Specifies which set of ModR/M->instruction tables to look at
107 // given a particular opcode.
109  ModRMDecision modRMDecisions[256];
110 };
111 
112 // Specifies which opcode->instruction tables to look at given
113 // a particular context (set of attributes). Since there are many possible
114 // contexts, the decoder first uses CONTEXTS_SYM to determine which context
115 // applies given a specific set of attributes. Hence there are only IC_max
116 // entries in this table, rather than 2^(ATTR_max).
118  OpcodeDecision opcodeDecisions[IC_max];
119 };
120 
121 #include "X86GenDisassemblerTables.inc"
122 
124  uint8_t opcode, uint8_t modRM) {
125  const struct ModRMDecision *dec;
126 
127  switch (type) {
128  case ONEBYTE:
129  dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
130  break;
131  case TWOBYTE:
132  dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
133  break;
134  case THREEBYTE_38:
135  dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
136  break;
137  case THREEBYTE_3A:
138  dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
139  break;
140  case XOP8_MAP:
141  dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
142  break;
143  case XOP9_MAP:
144  dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
145  break;
146  case XOPA_MAP:
147  dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
148  break;
149  case THREEDNOW_MAP:
150  dec =
151  &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
152  break;
153  case MAP5:
154  dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
155  break;
156  case MAP6:
157  dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
158  break;
159  }
160 
161  switch (dec->modrm_type) {
162  default:
163  llvm_unreachable("Corrupt table! Unknown modrm_type");
164  return 0;
165  case MODRM_ONEENTRY:
166  return modRMTable[dec->instructionIDs];
167  case MODRM_SPLITRM:
168  if (modFromModRM(modRM) == 0x3)
169  return modRMTable[dec->instructionIDs + 1];
170  return modRMTable[dec->instructionIDs];
171  case MODRM_SPLITREG:
172  if (modFromModRM(modRM) == 0x3)
173  return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];
174  return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
175  case MODRM_SPLITMISC:
176  if (modFromModRM(modRM) == 0x3)
177  return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];
178  return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
179  case MODRM_FULL:
180  return modRMTable[dec->instructionIDs + modRM];
181  }
182 }
183 
184 static bool peek(struct InternalInstruction *insn, uint8_t &byte) {
185  uint64_t offset = insn->readerCursor - insn->startLocation;
186  if (offset >= insn->bytes.size())
187  return true;
188  byte = insn->bytes[offset];
189  return false;
190 }
191 
192 template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {
193  auto r = insn->bytes;
194  uint64_t offset = insn->readerCursor - insn->startLocation;
195  if (offset + sizeof(T) > r.size())
196  return true;
197  T ret = 0;
198  for (unsigned i = 0; i < sizeof(T); ++i)
199  ret |= (uint64_t)r[offset + i] << (i * 8);
200  ptr = ret;
201  insn->readerCursor += sizeof(T);
202  return false;
203 }
204 
205 static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
206  return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
207 }
208 
209 // Consumes all of an instruction's prefix bytes, and marks the
210 // instruction as having them. Also sets the instruction's default operand,
211 // address, and other relevant data sizes to report operands correctly.
212 //
213 // insn must not be empty.
214 static int readPrefixes(struct InternalInstruction *insn) {
215  bool isPrefix = true;
216  uint8_t byte = 0;
217  uint8_t nextByte;
218 
219  LLVM_DEBUG(dbgs() << "readPrefixes()");
220 
221  while (isPrefix) {
222  // If we fail reading prefixes, just stop here and let the opcode reader
223  // deal with it.
224  if (consume(insn, byte))
225  break;
226 
227  // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
228  // break and let it be disassembled as a normal "instruction".
229  if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
230  break;
231 
232  if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) {
233  // If the byte is 0xf2 or 0xf3, and any of the following conditions are
234  // met:
235  // - it is followed by a LOCK (0xf0) prefix
236  // - it is followed by an xchg instruction
237  // then it should be disassembled as a xacquire/xrelease not repne/rep.
238  if (((nextByte == 0xf0) ||
239  ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
240  insn->xAcquireRelease = true;
241  if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
242  break;
243  }
244  // Also if the byte is 0xf3, and the following condition is met:
245  // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
246  // "mov mem, imm" (opcode 0xc6/0xc7) instructions.
247  // then it should be disassembled as an xrelease not rep.
248  if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
249  nextByte == 0xc6 || nextByte == 0xc7)) {
250  insn->xAcquireRelease = true;
251  break;
252  }
253  if (isREX(insn, nextByte)) {
254  uint8_t nnextByte;
255  // Go to REX prefix after the current one
256  if (consume(insn, nnextByte))
257  return -1;
258  // We should be able to read next byte after REX prefix
259  if (peek(insn, nnextByte))
260  return -1;
261  --insn->readerCursor;
262  }
263  }
264 
265  switch (byte) {
266  case 0xf0: // LOCK
267  insn->hasLockPrefix = true;
268  break;
269  case 0xf2: // REPNE/REPNZ
270  case 0xf3: { // REP or REPE/REPZ
271  uint8_t nextByte;
272  if (peek(insn, nextByte))
273  break;
274  // TODO:
275  // 1. There could be several 0x66
276  // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
277  // it's not mandatory prefix
278  // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
279  // 0x0f exactly after it to be mandatory prefix
280  if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)
281  // The last of 0xf2 /0xf3 is mandatory prefix
282  insn->mandatoryPrefix = byte;
283  insn->repeatPrefix = byte;
284  break;
285  }
286  case 0x2e: // CS segment override -OR- Branch not taken
288  break;
289  case 0x36: // SS segment override -OR- Branch taken
291  break;
292  case 0x3e: // DS segment override
294  break;
295  case 0x26: // ES segment override
297  break;
298  case 0x64: // FS segment override
300  break;
301  case 0x65: // GS segment override
303  break;
304  case 0x66: { // Operand-size override {
305  uint8_t nextByte;
306  insn->hasOpSize = true;
307  if (peek(insn, nextByte))
308  break;
309  // 0x66 can't overwrite existing mandatory prefix and should be ignored
310  if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
311  insn->mandatoryPrefix = byte;
312  break;
313  }
314  case 0x67: // Address-size override
315  insn->hasAdSize = true;
316  break;
317  default: // Not a prefix byte
318  isPrefix = false;
319  break;
320  }
321 
322  if (isPrefix)
323  LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte));
324  }
325 
327 
328  if (byte == 0x62) {
329  uint8_t byte1, byte2;
330  if (consume(insn, byte1)) {
331  LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");
332  return -1;
333  }
334 
335  if (peek(insn, byte2)) {
336  LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
337  return -1;
338  }
339 
340  if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
341  ((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) {
343  } else {
344  --insn->readerCursor; // unconsume byte1
345  --insn->readerCursor; // unconsume byte
346  }
347 
348  if (insn->vectorExtensionType == TYPE_EVEX) {
349  insn->vectorExtensionPrefix[0] = byte;
350  insn->vectorExtensionPrefix[1] = byte1;
351  if (consume(insn, insn->vectorExtensionPrefix[2])) {
352  LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
353  return -1;
354  }
355  if (consume(insn, insn->vectorExtensionPrefix[3])) {
356  LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");
357  return -1;
358  }
359 
360  // We simulate the REX prefix for simplicity's sake
361  if (insn->mode == MODE_64BIT) {
362  insn->rexPrefix = 0x40 |
363  (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |
364  (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |
365  (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |
366  (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
367  }
368 
369  LLVM_DEBUG(
370  dbgs() << format(
371  "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
372  insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
373  insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]));
374  }
375  } else if (byte == 0xc4) {
376  uint8_t byte1;
377  if (peek(insn, byte1)) {
378  LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
379  return -1;
380  }
381 
382  if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
384  else
385  --insn->readerCursor;
386 
387  if (insn->vectorExtensionType == TYPE_VEX_3B) {
388  insn->vectorExtensionPrefix[0] = byte;
389  consume(insn, insn->vectorExtensionPrefix[1]);
390  consume(insn, insn->vectorExtensionPrefix[2]);
391 
392  // We simulate the REX prefix for simplicity's sake
393 
394  if (insn->mode == MODE_64BIT)
395  insn->rexPrefix = 0x40 |
396  (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) |
397  (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) |
398  (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) |
399  (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
400 
401  LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
402  insn->vectorExtensionPrefix[0],
403  insn->vectorExtensionPrefix[1],
404  insn->vectorExtensionPrefix[2]));
405  }
406  } else if (byte == 0xc5) {
407  uint8_t byte1;
408  if (peek(insn, byte1)) {
409  LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
410  return -1;
411  }
412 
413  if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
415  else
416  --insn->readerCursor;
417 
418  if (insn->vectorExtensionType == TYPE_VEX_2B) {
419  insn->vectorExtensionPrefix[0] = byte;
420  consume(insn, insn->vectorExtensionPrefix[1]);
421 
422  if (insn->mode == MODE_64BIT)
423  insn->rexPrefix =
424  0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
425 
426  switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
427  default:
428  break;
429  case VEX_PREFIX_66:
430  insn->hasOpSize = true;
431  break;
432  }
433 
434  LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx",
435  insn->vectorExtensionPrefix[0],
436  insn->vectorExtensionPrefix[1]));
437  }
438  } else if (byte == 0x8f) {
439  uint8_t byte1;
440  if (peek(insn, byte1)) {
441  LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");
442  return -1;
443  }
444 
445  if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction.
447  else
448  --insn->readerCursor;
449 
450  if (insn->vectorExtensionType == TYPE_XOP) {
451  insn->vectorExtensionPrefix[0] = byte;
452  consume(insn, insn->vectorExtensionPrefix[1]);
453  consume(insn, insn->vectorExtensionPrefix[2]);
454 
455  // We simulate the REX prefix for simplicity's sake
456 
457  if (insn->mode == MODE_64BIT)
458  insn->rexPrefix = 0x40 |
459  (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) |
460  (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) |
461  (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) |
462  (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
463 
464  switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
465  default:
466  break;
467  case VEX_PREFIX_66:
468  insn->hasOpSize = true;
469  break;
470  }
471 
472  LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
473  insn->vectorExtensionPrefix[0],
474  insn->vectorExtensionPrefix[1],
475  insn->vectorExtensionPrefix[2]));
476  }
477  } else if (isREX(insn, byte)) {
478  if (peek(insn, nextByte))
479  return -1;
480  insn->rexPrefix = byte;
481  LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte));
482  } else
483  --insn->readerCursor;
484 
485  if (insn->mode == MODE_16BIT) {
486  insn->registerSize = (insn->hasOpSize ? 4 : 2);
487  insn->addressSize = (insn->hasAdSize ? 4 : 2);
488  insn->displacementSize = (insn->hasAdSize ? 4 : 2);
489  insn->immediateSize = (insn->hasOpSize ? 4 : 2);
490  } else if (insn->mode == MODE_32BIT) {
491  insn->registerSize = (insn->hasOpSize ? 2 : 4);
492  insn->addressSize = (insn->hasAdSize ? 2 : 4);
493  insn->displacementSize = (insn->hasAdSize ? 2 : 4);
494  insn->immediateSize = (insn->hasOpSize ? 2 : 4);
495  } else if (insn->mode == MODE_64BIT) {
496  if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
497  insn->registerSize = 8;
498  insn->addressSize = (insn->hasAdSize ? 4 : 8);
499  insn->displacementSize = 4;
500  insn->immediateSize = 4;
501  insn->hasOpSize = false;
502  } else {
503  insn->registerSize = (insn->hasOpSize ? 2 : 4);
504  insn->addressSize = (insn->hasAdSize ? 4 : 8);
505  insn->displacementSize = (insn->hasOpSize ? 2 : 4);
506  insn->immediateSize = (insn->hasOpSize ? 2 : 4);
507  }
508  }
509 
510  return 0;
511 }
512 
513 // Consumes the SIB byte to determine addressing information.
514 static int readSIB(struct InternalInstruction *insn) {
515  SIBBase sibBaseBase = SIB_BASE_NONE;
516  uint8_t index, base;
517 
518  LLVM_DEBUG(dbgs() << "readSIB()");
519  switch (insn->addressSize) {
520  case 2:
521  default:
522  llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");
523  case 4:
524  insn->sibIndexBase = SIB_INDEX_EAX;
525  sibBaseBase = SIB_BASE_EAX;
526  break;
527  case 8:
528  insn->sibIndexBase = SIB_INDEX_RAX;
529  sibBaseBase = SIB_BASE_RAX;
530  break;
531  }
532 
533  if (consume(insn, insn->sib))
534  return -1;
535 
536  index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
537 
538  if (index == 0x4) {
539  insn->sibIndex = SIB_INDEX_NONE;
540  } else {
541  insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
542  }
543 
544  insn->sibScale = 1 << scaleFromSIB(insn->sib);
545 
546  base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
547 
548  switch (base) {
549  case 0x5:
550  case 0xd:
551  switch (modFromModRM(insn->modRM)) {
552  case 0x0:
553  insn->eaDisplacement = EA_DISP_32;
554  insn->sibBase = SIB_BASE_NONE;
555  break;
556  case 0x1:
557  insn->eaDisplacement = EA_DISP_8;
558  insn->sibBase = (SIBBase)(sibBaseBase + base);
559  break;
560  case 0x2:
561  insn->eaDisplacement = EA_DISP_32;
562  insn->sibBase = (SIBBase)(sibBaseBase + base);
563  break;
564  default:
565  llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte");
566  }
567  break;
568  default:
569  insn->sibBase = (SIBBase)(sibBaseBase + base);
570  break;
571  }
572 
573  return 0;
574 }
575 
576 static int readDisplacement(struct InternalInstruction *insn) {
577  int8_t d8;
578  int16_t d16;
579  int32_t d32;
580  LLVM_DEBUG(dbgs() << "readDisplacement()");
581 
582  insn->displacementOffset = insn->readerCursor - insn->startLocation;
583  switch (insn->eaDisplacement) {
584  case EA_DISP_NONE:
585  break;
586  case EA_DISP_8:
587  if (consume(insn, d8))
588  return -1;
589  insn->displacement = d8;
590  break;
591  case EA_DISP_16:
592  if (consume(insn, d16))
593  return -1;
594  insn->displacement = d16;
595  break;
596  case EA_DISP_32:
597  if (consume(insn, d32))
598  return -1;
599  insn->displacement = d32;
600  break;
601  }
602 
603  return 0;
604 }
605 
606 // Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
607 static int readModRM(struct InternalInstruction *insn) {
608  uint8_t mod, rm, reg, evexrm;
609  LLVM_DEBUG(dbgs() << "readModRM()");
610 
611  if (insn->consumedModRM)
612  return 0;
613 
614  if (consume(insn, insn->modRM))
615  return -1;
616  insn->consumedModRM = true;
617 
618  mod = modFromModRM(insn->modRM);
619  rm = rmFromModRM(insn->modRM);
620  reg = regFromModRM(insn->modRM);
621 
622  // This goes by insn->registerSize to pick the correct register, which messes
623  // up if we're using (say) XMM or 8-bit register operands. That gets fixed in
624  // fixupReg().
625  switch (insn->registerSize) {
626  case 2:
627  insn->regBase = MODRM_REG_AX;
628  insn->eaRegBase = EA_REG_AX;
629  break;
630  case 4:
631  insn->regBase = MODRM_REG_EAX;
632  insn->eaRegBase = EA_REG_EAX;
633  break;
634  case 8:
635  insn->regBase = MODRM_REG_RAX;
636  insn->eaRegBase = EA_REG_RAX;
637  break;
638  }
639 
640  reg |= rFromREX(insn->rexPrefix) << 3;
641  rm |= bFromREX(insn->rexPrefix) << 3;
642 
643  evexrm = 0;
644  if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
645  reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
646  evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
647  }
648 
649  insn->reg = (Reg)(insn->regBase + reg);
650 
651  switch (insn->addressSize) {
652  case 2: {
653  EABase eaBaseBase = EA_BASE_BX_SI;
654 
655  switch (mod) {
656  case 0x0:
657  if (rm == 0x6) {
658  insn->eaBase = EA_BASE_NONE;
659  insn->eaDisplacement = EA_DISP_16;
660  if (readDisplacement(insn))
661  return -1;
662  } else {
663  insn->eaBase = (EABase)(eaBaseBase + rm);
665  }
666  break;
667  case 0x1:
668  insn->eaBase = (EABase)(eaBaseBase + rm);
669  insn->eaDisplacement = EA_DISP_8;
670  insn->displacementSize = 1;
671  if (readDisplacement(insn))
672  return -1;
673  break;
674  case 0x2:
675  insn->eaBase = (EABase)(eaBaseBase + rm);
676  insn->eaDisplacement = EA_DISP_16;
677  if (readDisplacement(insn))
678  return -1;
679  break;
680  case 0x3:
681  insn->eaBase = (EABase)(insn->eaRegBase + rm);
682  if (readDisplacement(insn))
683  return -1;
684  break;
685  }
686  break;
687  }
688  case 4:
689  case 8: {
690  EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
691 
692  switch (mod) {
693  case 0x0:
694  insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this
695  // In determining whether RIP-relative mode is used (rm=5),
696  // or whether a SIB byte is present (rm=4),
697  // the extension bits (REX.b and EVEX.x) are ignored.
698  switch (rm & 7) {
699  case 0x4: // SIB byte is present
700  insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64);
701  if (readSIB(insn) || readDisplacement(insn))
702  return -1;
703  break;
704  case 0x5: // RIP-relative
705  insn->eaBase = EA_BASE_NONE;
706  insn->eaDisplacement = EA_DISP_32;
707  if (readDisplacement(insn))
708  return -1;
709  break;
710  default:
711  insn->eaBase = (EABase)(eaBaseBase + rm);
712  break;
713  }
714  break;
715  case 0x1:
716  insn->displacementSize = 1;
718  case 0x2:
719  insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
720  switch (rm & 7) {
721  case 0x4: // SIB byte is present
722  insn->eaBase = EA_BASE_sib;
723  if (readSIB(insn) || readDisplacement(insn))
724  return -1;
725  break;
726  default:
727  insn->eaBase = (EABase)(eaBaseBase + rm);
728  if (readDisplacement(insn))
729  return -1;
730  break;
731  }
732  break;
733  case 0x3:
735  insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
736  break;
737  }
738  break;
739  }
740  } // switch (insn->addressSize)
741 
742  return 0;
743 }
744 
745 #define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
746  static uint16_t name(struct InternalInstruction *insn, OperandType type, \
747  uint8_t index, uint8_t *valid) { \
748  *valid = 1; \
749  switch (type) { \
750  default: \
751  debug("Unhandled register type"); \
752  *valid = 0; \
753  return 0; \
754  case TYPE_Rv: \
755  return base + index; \
756  case TYPE_R8: \
757  index &= mask; \
758  if (index > 0xf) \
759  *valid = 0; \
760  if (insn->rexPrefix && index >= 4 && index <= 7) { \
761  return prefix##_SPL + (index - 4); \
762  } else { \
763  return prefix##_AL + index; \
764  } \
765  case TYPE_R16: \
766  index &= mask; \
767  if (index > 0xf) \
768  *valid = 0; \
769  return prefix##_AX + index; \
770  case TYPE_R32: \
771  index &= mask; \
772  if (index > 0xf) \
773  *valid = 0; \
774  return prefix##_EAX + index; \
775  case TYPE_R64: \
776  index &= mask; \
777  if (index > 0xf) \
778  *valid = 0; \
779  return prefix##_RAX + index; \
780  case TYPE_ZMM: \
781  return prefix##_ZMM0 + index; \
782  case TYPE_YMM: \
783  return prefix##_YMM0 + index; \
784  case TYPE_XMM: \
785  return prefix##_XMM0 + index; \
786  case TYPE_TMM: \
787  if (index > 7) \
788  *valid = 0; \
789  return prefix##_TMM0 + index; \
790  case TYPE_VK: \
791  index &= 0xf; \
792  if (index > 7) \
793  *valid = 0; \
794  return prefix##_K0 + index; \
795  case TYPE_VK_PAIR: \
796  if (index > 7) \
797  *valid = 0; \
798  return prefix##_K0_K1 + (index / 2); \
799  case TYPE_MM64: \
800  return prefix##_MM0 + (index & 0x7); \
801  case TYPE_SEGMENTREG: \
802  if ((index & 7) > 5) \
803  *valid = 0; \
804  return prefix##_ES + (index & 7); \
805  case TYPE_DEBUGREG: \
806  return prefix##_DR0 + index; \
807  case TYPE_CONTROLREG: \
808  return prefix##_CR0 + index; \
809  case TYPE_MVSIBX: \
810  return prefix##_XMM0 + index; \
811  case TYPE_MVSIBY: \
812  return prefix##_YMM0 + index; \
813  case TYPE_MVSIBZ: \
814  return prefix##_ZMM0 + index; \
815  } \
816  }
817 
818 // Consult an operand type to determine the meaning of the reg or R/M field. If
819 // the operand is an XMM operand, for example, an operand would be XMM0 instead
820 // of AX, which readModRM() would otherwise misinterpret it as.
821 //
822 // @param insn - The instruction containing the operand.
823 // @param type - The operand type.
824 // @param index - The existing value of the field as reported by readModRM().
825 // @param valid - The address of a uint8_t. The target is set to 1 if the
826 // field is valid for the register class; 0 if not.
827 // @return - The proper value.
828 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)
829 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)
830 
831 // Consult an operand specifier to determine which of the fixup*Value functions
832 // to use in correcting readModRM()'ss interpretation.
833 //
834 // @param insn - See fixup*Value().
835 // @param op - The operand specifier.
836 // @return - 0 if fixup was successful; -1 if the register returned was
837 // invalid for its class.
838 static int fixupReg(struct InternalInstruction *insn,
839  const struct OperandSpecifier *op) {
840  uint8_t valid;
841  LLVM_DEBUG(dbgs() << "fixupReg()");
842 
843  switch ((OperandEncoding)op->encoding) {
844  default:
845  debug("Expected a REG or R/M encoding in fixupReg");
846  return -1;
847  case ENCODING_VVVV:
848  insn->vvvv =
849  (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid);
850  if (!valid)
851  return -1;
852  break;
853  case ENCODING_REG:
854  insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type,
855  insn->reg - insn->regBase, &valid);
856  if (!valid)
857  return -1;
858  break;
859  case ENCODING_SIB:
861  if (insn->eaBase >= insn->eaRegBase) {
862  insn->eaBase = (EABase)fixupRMValue(
863  insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);
864  if (!valid)
865  return -1;
866  }
867  break;
868  }
869 
870  return 0;
871 }
872 
873 // Read the opcode (except the ModR/M byte in the case of extended or escape
874 // opcodes).
875 static bool readOpcode(struct InternalInstruction *insn) {
876  uint8_t current;
877  LLVM_DEBUG(dbgs() << "readOpcode()");
878 
879  insn->opcodeType = ONEBYTE;
880  if (insn->vectorExtensionType == TYPE_EVEX) {
881  switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
882  default:
883  LLVM_DEBUG(
884  dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",
886  return true;
887  case VEX_LOB_0F:
888  insn->opcodeType = TWOBYTE;
889  return consume(insn, insn->opcode);
890  case VEX_LOB_0F38:
891  insn->opcodeType = THREEBYTE_38;
892  return consume(insn, insn->opcode);
893  case VEX_LOB_0F3A:
894  insn->opcodeType = THREEBYTE_3A;
895  return consume(insn, insn->opcode);
896  case VEX_LOB_MAP5:
897  insn->opcodeType = MAP5;
898  return consume(insn, insn->opcode);
899  case VEX_LOB_MAP6:
900  insn->opcodeType = MAP6;
901  return consume(insn, insn->opcode);
902  }
903  } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
904  switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
905  default:
906  LLVM_DEBUG(
907  dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
909  return true;
910  case VEX_LOB_0F:
911  insn->opcodeType = TWOBYTE;
912  return consume(insn, insn->opcode);
913  case VEX_LOB_0F38:
914  insn->opcodeType = THREEBYTE_38;
915  return consume(insn, insn->opcode);
916  case VEX_LOB_0F3A:
917  insn->opcodeType = THREEBYTE_3A;
918  return consume(insn, insn->opcode);
919  case VEX_LOB_MAP5:
920  insn->opcodeType = MAP5;
921  return consume(insn, insn->opcode);
922  case VEX_LOB_MAP6:
923  insn->opcodeType = MAP6;
924  return consume(insn, insn->opcode);
925  }
926  } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
927  insn->opcodeType = TWOBYTE;
928  return consume(insn, insn->opcode);
929  } else if (insn->vectorExtensionType == TYPE_XOP) {
930  switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
931  default:
932  LLVM_DEBUG(
933  dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
935  return true;
936  case XOP_MAP_SELECT_8:
937  insn->opcodeType = XOP8_MAP;
938  return consume(insn, insn->opcode);
939  case XOP_MAP_SELECT_9:
940  insn->opcodeType = XOP9_MAP;
941  return consume(insn, insn->opcode);
942  case XOP_MAP_SELECT_A:
943  insn->opcodeType = XOPA_MAP;
944  return consume(insn, insn->opcode);
945  }
946  }
947 
948  if (consume(insn, current))
949  return true;
950 
951  if (current == 0x0f) {
952  LLVM_DEBUG(
953  dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));
954  if (consume(insn, current))
955  return true;
956 
957  if (current == 0x38) {
958  LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
959  current));
960  if (consume(insn, current))
961  return true;
962 
963  insn->opcodeType = THREEBYTE_38;
964  } else if (current == 0x3a) {
965  LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
966  current));
967  if (consume(insn, current))
968  return true;
969 
970  insn->opcodeType = THREEBYTE_3A;
971  } else if (current == 0x0f) {
972  LLVM_DEBUG(
973  dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));
974 
975  // Consume operands before the opcode to comply with the 3DNow encoding
976  if (readModRM(insn))
977  return true;
978 
979  if (consume(insn, current))
980  return true;
981 
982  insn->opcodeType = THREEDNOW_MAP;
983  } else {
984  LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");
985  insn->opcodeType = TWOBYTE;
986  }
987  } else if (insn->mandatoryPrefix)
988  // The opcode with mandatory prefix must start with opcode escape.
989  // If not it's legacy repeat prefix
990  insn->mandatoryPrefix = 0;
991 
992  // At this point we have consumed the full opcode.
993  // Anything we consume from here on must be unconsumed.
994  insn->opcode = current;
995 
996  return false;
997 }
998 
999 // Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).
1000 static bool is16BitEquivalent(const char *orig, const char *equiv) {
1001  for (int i = 0;; i++) {
1002  if (orig[i] == '\0' && equiv[i] == '\0')
1003  return true;
1004  if (orig[i] == '\0' || equiv[i] == '\0')
1005  return false;
1006  if (orig[i] != equiv[i]) {
1007  if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
1008  continue;
1009  if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
1010  continue;
1011  if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
1012  continue;
1013  return false;
1014  }
1015  }
1016 }
1017 
1018 // Determine whether this instruction is a 64-bit instruction.
1019 static bool is64Bit(const char *name) {
1020  for (int i = 0;; ++i) {
1021  if (name[i] == '\0')
1022  return false;
1023  if (name[i] == '6' && name[i + 1] == '4')
1024  return true;
1025  }
1026 }
1027 
1028 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1029 // for extended and escape opcodes, and using a supplied attribute mask.
1030 static int getInstructionIDWithAttrMask(uint16_t *instructionID,
1031  struct InternalInstruction *insn,
1032  uint16_t attrMask) {
1033  auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);
1034  const ContextDecision *decision;
1035  switch (insn->opcodeType) {
1036  case ONEBYTE:
1037  decision = &ONEBYTE_SYM;
1038  break;
1039  case TWOBYTE:
1040  decision = &TWOBYTE_SYM;
1041  break;
1042  case THREEBYTE_38:
1043  decision = &THREEBYTE38_SYM;
1044  break;
1045  case THREEBYTE_3A:
1046  decision = &THREEBYTE3A_SYM;
1047  break;
1048  case XOP8_MAP:
1049  decision = &XOP8_MAP_SYM;
1050  break;
1051  case XOP9_MAP:
1052  decision = &XOP9_MAP_SYM;
1053  break;
1054  case XOPA_MAP:
1055  decision = &XOPA_MAP_SYM;
1056  break;
1057  case THREEDNOW_MAP:
1058  decision = &THREEDNOW_MAP_SYM;
1059  break;
1060  case MAP5:
1061  decision = &MAP5_SYM;
1062  break;
1063  case MAP6:
1064  decision = &MAP6_SYM;
1065  break;
1066  }
1067 
1068  if (decision->opcodeDecisions[insnCtx]
1069  .modRMDecisions[insn->opcode]
1070  .modrm_type != MODRM_ONEENTRY) {
1071  if (readModRM(insn))
1072  return -1;
1073  *instructionID =
1074  decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM);
1075  } else {
1076  *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0);
1077  }
1078 
1079  return 0;
1080 }
1081 
1082 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1083 // for extended and escape opcodes. Determines the attributes and context for
1084 // the instruction before doing so.
1085 static int getInstructionID(struct InternalInstruction *insn,
1086  const MCInstrInfo *mii) {
1087  uint16_t attrMask;
1088  uint16_t instructionID;
1089 
1090  LLVM_DEBUG(dbgs() << "getID()");
1091 
1092  attrMask = ATTR_NONE;
1093 
1094  if (insn->mode == MODE_64BIT)
1095  attrMask |= ATTR_64BIT;
1096 
1097  if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1098  attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
1099 
1100  if (insn->vectorExtensionType == TYPE_EVEX) {
1101  switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
1102  case VEX_PREFIX_66:
1103  attrMask |= ATTR_OPSIZE;
1104  break;
1105  case VEX_PREFIX_F3:
1106  attrMask |= ATTR_XS;
1107  break;
1108  case VEX_PREFIX_F2:
1109  attrMask |= ATTR_XD;
1110  break;
1111  }
1112 
1113  if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1114  attrMask |= ATTR_EVEXKZ;
1115  if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1116  attrMask |= ATTR_EVEXB;
1117  if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1118  attrMask |= ATTR_EVEXK;
1119  if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1120  attrMask |= ATTR_VEXL;
1121  if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1122  attrMask |= ATTR_EVEXL2;
1123  } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
1124  switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
1125  case VEX_PREFIX_66:
1126  attrMask |= ATTR_OPSIZE;
1127  break;
1128  case VEX_PREFIX_F3:
1129  attrMask |= ATTR_XS;
1130  break;
1131  case VEX_PREFIX_F2:
1132  attrMask |= ATTR_XD;
1133  break;
1134  }
1135 
1136  if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
1137  attrMask |= ATTR_VEXL;
1138  } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
1139  switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
1140  case VEX_PREFIX_66:
1141  attrMask |= ATTR_OPSIZE;
1142  if (insn->hasAdSize)
1143  attrMask |= ATTR_ADSIZE;
1144  break;
1145  case VEX_PREFIX_F3:
1146  attrMask |= ATTR_XS;
1147  break;
1148  case VEX_PREFIX_F2:
1149  attrMask |= ATTR_XD;
1150  break;
1151  }
1152 
1153  if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
1154  attrMask |= ATTR_VEXL;
1155  } else if (insn->vectorExtensionType == TYPE_XOP) {
1156  switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
1157  case VEX_PREFIX_66:
1158  attrMask |= ATTR_OPSIZE;
1159  break;
1160  case VEX_PREFIX_F3:
1161  attrMask |= ATTR_XS;
1162  break;
1163  case VEX_PREFIX_F2:
1164  attrMask |= ATTR_XD;
1165  break;
1166  }
1167 
1168  if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
1169  attrMask |= ATTR_VEXL;
1170  } else {
1171  return -1;
1172  }
1173  } else if (!insn->mandatoryPrefix) {
1174  // If we don't have mandatory prefix we should use legacy prefixes here
1175  if (insn->hasOpSize && (insn->mode != MODE_16BIT))
1176  attrMask |= ATTR_OPSIZE;
1177  if (insn->hasAdSize)
1178  attrMask |= ATTR_ADSIZE;
1179  if (insn->opcodeType == ONEBYTE) {
1180  if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
1181  // Special support for PAUSE
1182  attrMask |= ATTR_XS;
1183  } else {
1184  if (insn->repeatPrefix == 0xf2)
1185  attrMask |= ATTR_XD;
1186  else if (insn->repeatPrefix == 0xf3)
1187  attrMask |= ATTR_XS;
1188  }
1189  } else {
1190  switch (insn->mandatoryPrefix) {
1191  case 0xf2:
1192  attrMask |= ATTR_XD;
1193  break;
1194  case 0xf3:
1195  attrMask |= ATTR_XS;
1196  break;
1197  case 0x66:
1198  if (insn->mode != MODE_16BIT)
1199  attrMask |= ATTR_OPSIZE;
1200  if (insn->hasAdSize)
1201  attrMask |= ATTR_ADSIZE;
1202  break;
1203  case 0x67:
1204  attrMask |= ATTR_ADSIZE;
1205  break;
1206  }
1207  }
1208 
1209  if (insn->rexPrefix & 0x08) {
1210  attrMask |= ATTR_REXW;
1211  attrMask &= ~ATTR_ADSIZE;
1212  }
1213 
1214  if (insn->mode == MODE_16BIT) {
1215  // JCXZ/JECXZ need special handling for 16-bit mode because the meaning
1216  // of the AdSize prefix is inverted w.r.t. 32-bit mode.
1217  if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3)
1218  attrMask ^= ATTR_ADSIZE;
1219  // If we're in 16-bit mode and this is one of the relative jumps and opsize
1220  // prefix isn't present, we need to force the opsize attribute since the
1221  // prefix is inverted relative to 32-bit mode.
1222  if (!insn->hasOpSize && insn->opcodeType == ONEBYTE &&
1223  (insn->opcode == 0xE8 || insn->opcode == 0xE9))
1224  attrMask |= ATTR_OPSIZE;
1225 
1226  if (!insn->hasOpSize && insn->opcodeType == TWOBYTE &&
1227  insn->opcode >= 0x80 && insn->opcode <= 0x8F)
1228  attrMask |= ATTR_OPSIZE;
1229  }
1230 
1231 
1232  if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
1233  return -1;
1234 
1235  // The following clauses compensate for limitations of the tables.
1236 
1237  if (insn->mode != MODE_64BIT &&
1239  // The tables can't distinquish between cases where the W-bit is used to
1240  // select register size and cases where its a required part of the opcode.
1241  if ((insn->vectorExtensionType == TYPE_EVEX &&
1242  wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
1243  (insn->vectorExtensionType == TYPE_VEX_3B &&
1244  wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
1245  (insn->vectorExtensionType == TYPE_XOP &&
1246  wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
1247 
1248  uint16_t instructionIDWithREXW;
1249  if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn,
1250  attrMask | ATTR_REXW)) {
1251  insn->instructionID = instructionID;
1252  insn->spec = &INSTRUCTIONS_SYM[instructionID];
1253  return 0;
1254  }
1255 
1256  auto SpecName = mii->getName(instructionIDWithREXW);
1257  // If not a 64-bit instruction. Switch the opcode.
1258  if (!is64Bit(SpecName.data())) {
1259  insn->instructionID = instructionIDWithREXW;
1260  insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW];
1261  return 0;
1262  }
1263  }
1264  }
1265 
1266  // Absolute moves, umonitor, and movdir64b need special handling.
1267  // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1268  // inverted w.r.t.
1269  // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1270  // any position.
1271  if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
1272  (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
1273  (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {
1274  // Make sure we observed the prefixes in any position.
1275  if (insn->hasAdSize)
1276  attrMask |= ATTR_ADSIZE;
1277  if (insn->hasOpSize)
1278  attrMask |= ATTR_OPSIZE;
1279 
1280  // In 16-bit, invert the attributes.
1281  if (insn->mode == MODE_16BIT) {
1282  attrMask ^= ATTR_ADSIZE;
1283 
1284  // The OpSize attribute is only valid with the absolute moves.
1285  if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
1286  attrMask ^= ATTR_OPSIZE;
1287  }
1288 
1289  if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
1290  return -1;
1291 
1292  insn->instructionID = instructionID;
1293  insn->spec = &INSTRUCTIONS_SYM[instructionID];
1294  return 0;
1295  }
1296 
1297  if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
1298  !(attrMask & ATTR_OPSIZE)) {
1299  // The instruction tables make no distinction between instructions that
1300  // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1301  // particular spot (i.e., many MMX operations). In general we're
1302  // conservative, but in the specific case where OpSize is present but not in
1303  // the right place we check if there's a 16-bit operation.
1304  const struct InstructionSpecifier *spec;
1305  uint16_t instructionIDWithOpsize;
1306  llvm::StringRef specName, specWithOpSizeName;
1307 
1308  spec = &INSTRUCTIONS_SYM[instructionID];
1309 
1310  if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn,
1311  attrMask | ATTR_OPSIZE)) {
1312  // ModRM required with OpSize but not present. Give up and return the
1313  // version without OpSize set.
1314  insn->instructionID = instructionID;
1315  insn->spec = spec;
1316  return 0;
1317  }
1318 
1319  specName = mii->getName(instructionID);
1320  specWithOpSizeName = mii->getName(instructionIDWithOpsize);
1321 
1322  if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
1323  (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
1324  insn->instructionID = instructionIDWithOpsize;
1325  insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize];
1326  } else {
1327  insn->instructionID = instructionID;
1328  insn->spec = spec;
1329  }
1330  return 0;
1331  }
1332 
1333  if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1334  insn->rexPrefix & 0x01) {
1335  // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode
1336  // as XCHG %r8, %eax.
1337  const struct InstructionSpecifier *spec;
1338  uint16_t instructionIDWithNewOpcode;
1339  const struct InstructionSpecifier *specWithNewOpcode;
1340 
1341  spec = &INSTRUCTIONS_SYM[instructionID];
1342 
1343  // Borrow opcode from one of the other XCHGar opcodes
1344  insn->opcode = 0x91;
1345 
1346  if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn,
1347  attrMask)) {
1348  insn->opcode = 0x90;
1349 
1350  insn->instructionID = instructionID;
1351  insn->spec = spec;
1352  return 0;
1353  }
1354 
1355  specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];
1356 
1357  // Change back
1358  insn->opcode = 0x90;
1359 
1360  insn->instructionID = instructionIDWithNewOpcode;
1361  insn->spec = specWithNewOpcode;
1362 
1363  return 0;
1364  }
1365 
1366  insn->instructionID = instructionID;
1367  insn->spec = &INSTRUCTIONS_SYM[insn->instructionID];
1368 
1369  return 0;
1370 }
1371 
1372 // Read an operand from the opcode field of an instruction and interprets it
1373 // appropriately given the operand width. Handles AddRegFrm instructions.
1374 //
1375 // @param insn - the instruction whose opcode field is to be read.
1376 // @param size - The width (in bytes) of the register being specified.
1377 // 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1378 // RAX.
1379 // @return - 0 on success; nonzero otherwise.
1380 static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
1381  LLVM_DEBUG(dbgs() << "readOpcodeRegister()");
1382 
1383  if (size == 0)
1384  size = insn->registerSize;
1385 
1386  switch (size) {
1387  case 1:
1388  insn->opcodeRegister = (Reg)(
1389  MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1390  if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1391  insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1392  insn->opcodeRegister =
1393  (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4));
1394  }
1395 
1396  break;
1397  case 2:
1398  insn->opcodeRegister = (Reg)(
1399  MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1400  break;
1401  case 4:
1402  insn->opcodeRegister =
1403  (Reg)(MODRM_REG_EAX +
1404  ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1405  break;
1406  case 8:
1407  insn->opcodeRegister =
1408  (Reg)(MODRM_REG_RAX +
1409  ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1410  break;
1411  }
1412 
1413  return 0;
1414 }
1415 
1416 // Consume an immediate operand from an instruction, given the desired operand
1417 // size.
1418 //
1419 // @param insn - The instruction whose operand is to be read.
1420 // @param size - The width (in bytes) of the operand.
1421 // @return - 0 if the immediate was successfully consumed; nonzero
1422 // otherwise.
1423 static int readImmediate(struct InternalInstruction *insn, uint8_t size) {
1424  uint8_t imm8;
1425  uint16_t imm16;
1426  uint32_t imm32;
1427  uint64_t imm64;
1428 
1429  LLVM_DEBUG(dbgs() << "readImmediate()");
1430 
1431  assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates");
1432 
1433  insn->immediateSize = size;
1434  insn->immediateOffset = insn->readerCursor - insn->startLocation;
1435 
1436  switch (size) {
1437  case 1:
1438  if (consume(insn, imm8))
1439  return -1;
1440  insn->immediates[insn->numImmediatesConsumed] = imm8;
1441  break;
1442  case 2:
1443  if (consume(insn, imm16))
1444  return -1;
1445  insn->immediates[insn->numImmediatesConsumed] = imm16;
1446  break;
1447  case 4:
1448  if (consume(insn, imm32))
1449  return -1;
1450  insn->immediates[insn->numImmediatesConsumed] = imm32;
1451  break;
1452  case 8:
1453  if (consume(insn, imm64))
1454  return -1;
1455  insn->immediates[insn->numImmediatesConsumed] = imm64;
1456  break;
1457  default:
1458  llvm_unreachable("invalid size");
1459  }
1460 
1461  insn->numImmediatesConsumed++;
1462 
1463  return 0;
1464 }
1465 
1466 // Consume vvvv from an instruction if it has a VEX prefix.
1467 static int readVVVV(struct InternalInstruction *insn) {
1468  LLVM_DEBUG(dbgs() << "readVVVV()");
1469 
1470  int vvvv;
1471  if (insn->vectorExtensionType == TYPE_EVEX)
1472  vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
1474  else if (insn->vectorExtensionType == TYPE_VEX_3B)
1475  vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1476  else if (insn->vectorExtensionType == TYPE_VEX_2B)
1477  vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1478  else if (insn->vectorExtensionType == TYPE_XOP)
1479  vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1480  else
1481  return -1;
1482 
1483  if (insn->mode != MODE_64BIT)
1484  vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
1485 
1486  insn->vvvv = static_cast<Reg>(vvvv);
1487  return 0;
1488 }
1489 
1490 // Read an mask register from the opcode field of an instruction.
1491 //
1492 // @param insn - The instruction whose opcode field is to be read.
1493 // @return - 0 on success; nonzero otherwise.
1494 static int readMaskRegister(struct InternalInstruction *insn) {
1495  LLVM_DEBUG(dbgs() << "readMaskRegister()");
1496 
1497  if (insn->vectorExtensionType != TYPE_EVEX)
1498  return -1;
1499 
1500  insn->writemask =
1501  static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
1502  return 0;
1503 }
1504 
1505 // Consults the specifier for an instruction and consumes all
1506 // operands for that instruction, interpreting them as it goes.
1507 static int readOperands(struct InternalInstruction *insn) {
1508  int hasVVVV, needVVVV;
1509  int sawRegImm = 0;
1510 
1511  LLVM_DEBUG(dbgs() << "readOperands()");
1512 
1513  // If non-zero vvvv specified, make sure one of the operands uses it.
1514  hasVVVV = !readVVVV(insn);
1515  needVVVV = hasVVVV && (insn->vvvv != 0);
1516 
1517  for (const auto &Op : x86OperandSets[insn->spec->operands]) {
1518  switch (Op.encoding) {
1519  case ENCODING_NONE:
1520  case ENCODING_SI:
1521  case ENCODING_DI:
1522  break;
1524  // VSIB can use the V2 bit so check only the other bits.
1525  if (needVVVV)
1526  needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
1527  if (readModRM(insn))
1528  return -1;
1529 
1530  // Reject if SIB wasn't used.
1531  if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1532  return -1;
1533 
1534  // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
1535  if (insn->sibIndex == SIB_INDEX_NONE)
1536  insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
1537 
1538  // If EVEX.v2 is set this is one of the 16-31 registers.
1539  if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
1541  insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
1542 
1543  // Adjust the index register to the correct size.
1544  switch ((OperandType)Op.type) {
1545  default:
1546  debug("Unhandled VSIB index type");
1547  return -1;
1548  case TYPE_MVSIBX:
1549  insn->sibIndex =
1550  (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase));
1551  break;
1552  case TYPE_MVSIBY:
1553  insn->sibIndex =
1554  (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase));
1555  break;
1556  case TYPE_MVSIBZ:
1557  insn->sibIndex =
1558  (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase));
1559  break;
1560  }
1561 
1562  // Apply the AVX512 compressed displacement scaling factor.
1563  if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1564  insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
1565  break;
1566  case ENCODING_SIB:
1567  // Reject if SIB wasn't used.
1568  if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1569  return -1;
1570  if (readModRM(insn))
1571  return -1;
1572  if (fixupReg(insn, &Op))
1573  return -1;
1574  break;
1575  case ENCODING_REG:
1577  if (readModRM(insn))
1578  return -1;
1579  if (fixupReg(insn, &Op))
1580  return -1;
1581  // Apply the AVX512 compressed displacement scaling factor.
1582  if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1583  insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
1584  break;
1585  case ENCODING_IB:
1586  if (sawRegImm) {
1587  // Saw a register immediate so don't read again and instead split the
1588  // previous immediate. FIXME: This is a hack.
1589  insn->immediates[insn->numImmediatesConsumed] =
1590  insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1591  ++insn->numImmediatesConsumed;
1592  break;
1593  }
1594  if (readImmediate(insn, 1))
1595  return -1;
1596  if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
1597  sawRegImm = 1;
1598  break;
1599  case ENCODING_IW:
1600  if (readImmediate(insn, 2))
1601  return -1;
1602  break;
1603  case ENCODING_ID:
1604  if (readImmediate(insn, 4))
1605  return -1;
1606  break;
1607  case ENCODING_IO:
1608  if (readImmediate(insn, 8))
1609  return -1;
1610  break;
1611  case ENCODING_Iv:
1612  if (readImmediate(insn, insn->immediateSize))
1613  return -1;
1614  break;
1615  case ENCODING_Ia:
1616  if (readImmediate(insn, insn->addressSize))
1617  return -1;
1618  break;
1619  case ENCODING_IRC:
1620  insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
1622  break;
1623  case ENCODING_RB:
1624  if (readOpcodeRegister(insn, 1))
1625  return -1;
1626  break;
1627  case ENCODING_RW:
1628  if (readOpcodeRegister(insn, 2))
1629  return -1;
1630  break;
1631  case ENCODING_RD:
1632  if (readOpcodeRegister(insn, 4))
1633  return -1;
1634  break;
1635  case ENCODING_RO:
1636  if (readOpcodeRegister(insn, 8))
1637  return -1;
1638  break;
1639  case ENCODING_Rv:
1640  if (readOpcodeRegister(insn, 0))
1641  return -1;
1642  break;
1643  case ENCODING_CC:
1644  insn->immediates[1] = insn->opcode & 0xf;
1645  break;
1646  case ENCODING_FP:
1647  break;
1648  case ENCODING_VVVV:
1649  needVVVV = 0; // Mark that we have found a VVVV operand.
1650  if (!hasVVVV)
1651  return -1;
1652  if (insn->mode != MODE_64BIT)
1653  insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
1654  if (fixupReg(insn, &Op))
1655  return -1;
1656  break;
1657  case ENCODING_WRITEMASK:
1658  if (readMaskRegister(insn))
1659  return -1;
1660  break;
1661  case ENCODING_DUP:
1662  break;
1663  default:
1664  LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");
1665  return -1;
1666  }
1667  }
1668 
1669  // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail
1670  if (needVVVV)
1671  return -1;
1672 
1673  return 0;
1674 }
1675 
1676 namespace llvm {
1677 
1678 // Fill-ins to make the compiler happy. These constants are never actually
1679 // assigned; they are just filler to make an automatically-generated switch
1680 // statement work.
1681 namespace X86 {
1682  enum {
1683  BX_SI = 500,
1684  BX_DI = 501,
1685  BP_SI = 502,
1686  BP_DI = 503,
1687  sib = 504,
1688  sib64 = 505
1689  };
1690 } // namespace X86
1691 
1692 } // namespace llvm
1693 
1694 static bool translateInstruction(MCInst &target,
1695  InternalInstruction &source,
1696  const MCDisassembler *Dis);
1697 
1698 namespace {
1699 
1700 /// Generic disassembler for all X86 platforms. All each platform class should
1701 /// have to do is subclass the constructor, and provide a different
1702 /// disassemblerMode value.
1703 class X86GenericDisassembler : public MCDisassembler {
1704  std::unique_ptr<const MCInstrInfo> MII;
1705 public:
1706  X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
1707  std::unique_ptr<const MCInstrInfo> MII);
1708 public:
1709  DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
1710  ArrayRef<uint8_t> Bytes, uint64_t Address,
1711  raw_ostream &cStream) const override;
1712 
1713 private:
1714  DisassemblerMode fMode;
1715 };
1716 
1717 } // namespace
1718 
1719 X86GenericDisassembler::X86GenericDisassembler(
1720  const MCSubtargetInfo &STI,
1721  MCContext &Ctx,
1722  std::unique_ptr<const MCInstrInfo> MII)
1723  : MCDisassembler(STI, Ctx), MII(std::move(MII)) {
1724  const FeatureBitset &FB = STI.getFeatureBits();
1725  if (FB[X86::Mode16Bit]) {
1726  fMode = MODE_16BIT;
1727  return;
1728  } else if (FB[X86::Mode32Bit]) {
1729  fMode = MODE_32BIT;
1730  return;
1731  } else if (FB[X86::Mode64Bit]) {
1732  fMode = MODE_64BIT;
1733  return;
1734  }
1735 
1736  llvm_unreachable("Invalid CPU mode");
1737 }
1738 
1739 MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
1740  MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
1741  raw_ostream &CStream) const {
1742  CommentStream = &CStream;
1743 
1745  memset(&Insn, 0, sizeof(InternalInstruction));
1746  Insn.bytes = Bytes;
1747  Insn.startLocation = Address;
1748  Insn.readerCursor = Address;
1749  Insn.mode = fMode;
1750 
1751  if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) ||
1752  getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 ||
1753  readOperands(&Insn)) {
1754  Size = Insn.readerCursor - Address;
1755  return Fail;
1756  }
1757 
1758  Insn.operands = x86OperandSets[Insn.spec->operands];
1759  Insn.length = Insn.readerCursor - Insn.startLocation;
1760  Size = Insn.length;
1761  if (Size > 15)
1762  LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");
1763 
1764  bool Ret = translateInstruction(Instr, Insn, this);
1765  if (!Ret) {
1766  unsigned Flags = X86::IP_NO_PREFIX;
1767  if (Insn.hasAdSize)
1768  Flags |= X86::IP_HAS_AD_SIZE;
1769  if (!Insn.mandatoryPrefix) {
1770  if (Insn.hasOpSize)
1771  Flags |= X86::IP_HAS_OP_SIZE;
1772  if (Insn.repeatPrefix == 0xf2)
1773  Flags |= X86::IP_HAS_REPEAT_NE;
1774  else if (Insn.repeatPrefix == 0xf3 &&
1775  // It should not be 'pause' f3 90
1776  Insn.opcode != 0x90)
1777  Flags |= X86::IP_HAS_REPEAT;
1778  if (Insn.hasLockPrefix)
1779  Flags |= X86::IP_HAS_LOCK;
1780  }
1781  Instr.setFlags(Flags);
1782  }
1783  return (!Ret) ? Success : Fail;
1784 }
1785 
1786 //
1787 // Private code that translates from struct InternalInstructions to MCInsts.
1788 //
1789 
1790 /// translateRegister - Translates an internal register to the appropriate LLVM
1791 /// register, and appends it as an operand to an MCInst.
1792 ///
1793 /// @param mcInst - The MCInst to append to.
1794 /// @param reg - The Reg to append.
1795 static void translateRegister(MCInst &mcInst, Reg reg) {
1796 #define ENTRY(x) X86::x,
1797  static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};
1798 #undef ENTRY
1799 
1800  MCPhysReg llvmRegnum = llvmRegnums[reg];
1801  mcInst.addOperand(MCOperand::createReg(llvmRegnum));
1802 }
1803 
1804 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
1805 /// immediate Value in the MCInst.
1806 ///
1807 /// @param Value - The immediate Value, has had any PC adjustment made by
1808 /// the caller.
1809 /// @param isBranch - If the instruction is a branch instruction
1810 /// @param Address - The starting address of the instruction
1811 /// @param Offset - The byte offset to this immediate in the instruction
1812 /// @param Width - The byte width of this immediate in the instruction
1813 ///
1814 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
1815 /// called then that function is called to get any symbolic information for the
1816 /// immediate in the instruction using the Address, Offset and Width. If that
1817 /// returns non-zero then the symbolic information it returns is used to create
1818 /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
1819 /// returns zero and isBranch is true then a symbol look up for immediate Value
1820 /// is done and if a symbol is found an MCExpr is created with that, else
1821 /// an MCExpr with the immediate Value is created. This function returns true
1822 /// if it adds an operand to the MCInst and false otherwise.
1823 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
1824  uint64_t Address, uint64_t Offset,
1825  uint64_t Width, MCInst &MI,
1826  const MCDisassembler *Dis) {
1828  Offset, Width);
1829 }
1830 
1831 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
1832 /// referenced by a load instruction with the base register that is the rip.
1833 /// These can often be addresses in a literal pool. The Address of the
1834 /// instruction and its immediate Value are used to determine the address
1835 /// being referenced in the literal pool entry. The SymbolLookUp call back will
1836 /// return a pointer to a literal 'C' string if the referenced address is an
1837 /// address into a section with 'C' string literals.
1839  const void *Decoder) {
1840  const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
1842 }
1843 
1844 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
1845  0, // SEG_OVERRIDE_NONE
1846  X86::CS,
1847  X86::SS,
1848  X86::DS,
1849  X86::ES,
1850  X86::FS,
1851  X86::GS
1852 };
1853 
1854 /// translateSrcIndex - Appends a source index operand to an MCInst.
1855 ///
1856 /// @param mcInst - The MCInst to append to.
1857 /// @param insn - The internal instruction.
1858 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
1859  unsigned baseRegNo;
1860 
1861  if (insn.mode == MODE_64BIT)
1862  baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;
1863  else if (insn.mode == MODE_32BIT)
1864  baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;
1865  else {
1866  assert(insn.mode == MODE_16BIT);
1867  baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;
1868  }
1869  MCOperand baseReg = MCOperand::createReg(baseRegNo);
1870  mcInst.addOperand(baseReg);
1871 
1872  MCOperand segmentReg;
1874  mcInst.addOperand(segmentReg);
1875  return false;
1876 }
1877 
1878 /// translateDstIndex - Appends a destination index operand to an MCInst.
1879 ///
1880 /// @param mcInst - The MCInst to append to.
1881 /// @param insn - The internal instruction.
1882 
1883 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
1884  unsigned baseRegNo;
1885 
1886  if (insn.mode == MODE_64BIT)
1887  baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;
1888  else if (insn.mode == MODE_32BIT)
1889  baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;
1890  else {
1891  assert(insn.mode == MODE_16BIT);
1892  baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;
1893  }
1894  MCOperand baseReg = MCOperand::createReg(baseRegNo);
1895  mcInst.addOperand(baseReg);
1896  return false;
1897 }
1898 
1899 /// translateImmediate - Appends an immediate operand to an MCInst.
1900 ///
1901 /// @param mcInst - The MCInst to append to.
1902 /// @param immediate - The immediate value to append.
1903 /// @param operand - The operand, as stored in the descriptor table.
1904 /// @param insn - The internal instruction.
1905 static void translateImmediate(MCInst &mcInst, uint64_t immediate,
1906  const OperandSpecifier &operand,
1907  InternalInstruction &insn,
1908  const MCDisassembler *Dis) {
1909  // Sign-extend the immediate if necessary.
1910 
1911  OperandType type = (OperandType)operand.type;
1912 
1913  bool isBranch = false;
1914  uint64_t pcrel = 0;
1915  if (type == TYPE_REL) {
1916  isBranch = true;
1917  pcrel = insn.startLocation +
1918  insn.immediateOffset + insn.immediateSize;
1919  switch (operand.encoding) {
1920  default:
1921  break;
1922  case ENCODING_Iv:
1923  switch (insn.displacementSize) {
1924  default:
1925  break;
1926  case 1:
1927  if(immediate & 0x80)
1928  immediate |= ~(0xffull);
1929  break;
1930  case 2:
1931  if(immediate & 0x8000)
1932  immediate |= ~(0xffffull);
1933  break;
1934  case 4:
1935  if(immediate & 0x80000000)
1936  immediate |= ~(0xffffffffull);
1937  break;
1938  case 8:
1939  break;
1940  }
1941  break;
1942  case ENCODING_IB:
1943  if(immediate & 0x80)
1944  immediate |= ~(0xffull);
1945  break;
1946  case ENCODING_IW:
1947  if(immediate & 0x8000)
1948  immediate |= ~(0xffffull);
1949  break;
1950  case ENCODING_ID:
1951  if(immediate & 0x80000000)
1952  immediate |= ~(0xffffffffull);
1953  break;
1954  }
1955  }
1956  // By default sign-extend all X86 immediates based on their encoding.
1957  else if (type == TYPE_IMM) {
1958  switch (operand.encoding) {
1959  default:
1960  break;
1961  case ENCODING_IB:
1962  if(immediate & 0x80)
1963  immediate |= ~(0xffull);
1964  break;
1965  case ENCODING_IW:
1966  if(immediate & 0x8000)
1967  immediate |= ~(0xffffull);
1968  break;
1969  case ENCODING_ID:
1970  if(immediate & 0x80000000)
1971  immediate |= ~(0xffffffffull);
1972  break;
1973  case ENCODING_IO:
1974  break;
1975  }
1976  }
1977 
1978  switch (type) {
1979  case TYPE_XMM:
1980  mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4)));
1981  return;
1982  case TYPE_YMM:
1983  mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4)));
1984  return;
1985  case TYPE_ZMM:
1986  mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));
1987  return;
1988  default:
1989  // operand is 64 bits wide. Do nothing.
1990  break;
1991  }
1992 
1993  if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
1994  insn.immediateOffset, insn.immediateSize,
1995  mcInst, Dis))
1996  mcInst.addOperand(MCOperand::createImm(immediate));
1997 
1998  if (type == TYPE_MOFFS) {
1999  MCOperand segmentReg;
2001  mcInst.addOperand(segmentReg);
2002  }
2003 }
2004 
2005 /// translateRMRegister - Translates a register stored in the R/M field of the
2006 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
2007 /// @param mcInst - The MCInst to append to.
2008 /// @param insn - The internal instruction to extract the R/M field
2009 /// from.
2010 /// @return - 0 on success; -1 otherwise
2011 static bool translateRMRegister(MCInst &mcInst,
2012  InternalInstruction &insn) {
2013  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
2014  debug("A R/M register operand may not have a SIB byte");
2015  return true;
2016  }
2017 
2018  switch (insn.eaBase) {
2019  default:
2020  debug("Unexpected EA base register");
2021  return true;
2022  case EA_BASE_NONE:
2023  debug("EA_BASE_NONE for ModR/M base");
2024  return true;
2025 #define ENTRY(x) case EA_BASE_##x:
2026  ALL_EA_BASES
2027 #undef ENTRY
2028  debug("A R/M register operand may not have a base; "
2029  "the operand must be a register.");
2030  return true;
2031 #define ENTRY(x) \
2032  case EA_REG_##x: \
2033  mcInst.addOperand(MCOperand::createReg(X86::x)); break;
2034  ALL_REGS
2035 #undef ENTRY
2036  }
2037 
2038  return false;
2039 }
2040 
2041 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
2042 /// fields of an internal instruction (and possibly its SIB byte) to a memory
2043 /// operand in LLVM's format, and appends it to an MCInst.
2044 ///
2045 /// @param mcInst - The MCInst to append to.
2046 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
2047 /// from.
2048 /// @param ForceSIB - The instruction must use SIB.
2049 /// @return - 0 on success; nonzero otherwise
2050 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
2051  const MCDisassembler *Dis,
2052  bool ForceSIB = false) {
2053  // Addresses in an MCInst are represented as five operands:
2054  // 1. basereg (register) The R/M base, or (if there is a SIB) the
2055  // SIB base
2056  // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
2057  // scale amount
2058  // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
2059  // the index (which is multiplied by the
2060  // scale amount)
2061  // 4. displacement (immediate) 0, or the displacement if there is one
2062  // 5. segmentreg (register) x86_registerNONE for now, but could be set
2063  // if we have segment overrides
2064 
2065  MCOperand baseReg;
2066  MCOperand scaleAmount;
2067  MCOperand indexReg;
2068  MCOperand displacement;
2069  MCOperand segmentReg;
2070  uint64_t pcrel = 0;
2071 
2072  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
2073  if (insn.sibBase != SIB_BASE_NONE) {
2074  switch (insn.sibBase) {
2075  default:
2076  debug("Unexpected sibBase");
2077  return true;
2078 #define ENTRY(x) \
2079  case SIB_BASE_##x: \
2080  baseReg = MCOperand::createReg(X86::x); break;
2082 #undef ENTRY
2083  }
2084  } else {
2085  baseReg = MCOperand::createReg(X86::NoRegister);
2086  }
2087 
2088  if (insn.sibIndex != SIB_INDEX_NONE) {
2089  switch (insn.sibIndex) {
2090  default:
2091  debug("Unexpected sibIndex");
2092  return true;
2093 #define ENTRY(x) \
2094  case SIB_INDEX_##x: \
2095  indexReg = MCOperand::createReg(X86::x); break;
2098  REGS_XMM
2099  REGS_YMM
2100  REGS_ZMM
2101 #undef ENTRY
2102  }
2103  } else {
2104  // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,
2105  // but no index is used and modrm alone should have been enough.
2106  // -No base register in 32-bit mode. In 64-bit mode this is used to
2107  // avoid rip-relative addressing.
2108  // -Any base register used other than ESP/RSP/R12D/R12. Using these as a
2109  // base always requires a SIB byte.
2110  // -A scale other than 1 is used.
2111  if (!ForceSIB &&
2112  (insn.sibScale != 1 ||
2113  (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||
2114  (insn.sibBase != SIB_BASE_NONE &&
2115  insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
2116  insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
2117  indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ :
2118  X86::RIZ);
2119  } else
2120  indexReg = MCOperand::createReg(X86::NoRegister);
2121  }
2122 
2123  scaleAmount = MCOperand::createImm(insn.sibScale);
2124  } else {
2125  switch (insn.eaBase) {
2126  case EA_BASE_NONE:
2127  if (insn.eaDisplacement == EA_DISP_NONE) {
2128  debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2129  return true;
2130  }
2131  if (insn.mode == MODE_64BIT){
2132  pcrel = insn.startLocation +
2135  insn.displacementOffset,
2136  insn.displacement + pcrel, Dis);
2137  // Section 2.2.1.6
2138  baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP :
2139  X86::RIP);
2140  }
2141  else
2142  baseReg = MCOperand::createReg(X86::NoRegister);
2143 
2144  indexReg = MCOperand::createReg(X86::NoRegister);
2145  break;
2146  case EA_BASE_BX_SI:
2147  baseReg = MCOperand::createReg(X86::BX);
2148  indexReg = MCOperand::createReg(X86::SI);
2149  break;
2150  case EA_BASE_BX_DI:
2151  baseReg = MCOperand::createReg(X86::BX);
2152  indexReg = MCOperand::createReg(X86::DI);
2153  break;
2154  case EA_BASE_BP_SI:
2155  baseReg = MCOperand::createReg(X86::BP);
2156  indexReg = MCOperand::createReg(X86::SI);
2157  break;
2158  case EA_BASE_BP_DI:
2159  baseReg = MCOperand::createReg(X86::BP);
2160  indexReg = MCOperand::createReg(X86::DI);
2161  break;
2162  default:
2163  indexReg = MCOperand::createReg(X86::NoRegister);
2164  switch (insn.eaBase) {
2165  default:
2166  debug("Unexpected eaBase");
2167  return true;
2168  // Here, we will use the fill-ins defined above. However,
2169  // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
2170  // sib and sib64 were handled in the top-level if, so they're only
2171  // placeholders to keep the compiler happy.
2172 #define ENTRY(x) \
2173  case EA_BASE_##x: \
2174  baseReg = MCOperand::createReg(X86::x); break;
2175  ALL_EA_BASES
2176 #undef ENTRY
2177 #define ENTRY(x) case EA_REG_##x:
2178  ALL_REGS
2179 #undef ENTRY
2180  debug("A R/M memory operand may not be a register; "
2181  "the base field must be a base.");
2182  return true;
2183  }
2184  }
2185 
2186  scaleAmount = MCOperand::createImm(1);
2187  }
2188 
2189  displacement = MCOperand::createImm(insn.displacement);
2190 
2192 
2193  mcInst.addOperand(baseReg);
2194  mcInst.addOperand(scaleAmount);
2195  mcInst.addOperand(indexReg);
2196  if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
2197  insn.startLocation, insn.displacementOffset,
2198  insn.displacementSize, mcInst, Dis))
2199  mcInst.addOperand(displacement);
2200  mcInst.addOperand(segmentReg);
2201  return false;
2202 }
2203 
2204 /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
2205 /// byte of an instruction to LLVM form, and appends it to an MCInst.
2206 ///
2207 /// @param mcInst - The MCInst to append to.
2208 /// @param operand - The operand, as stored in the descriptor table.
2209 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
2210 /// from.
2211 /// @return - 0 on success; nonzero otherwise
2212 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
2213  InternalInstruction &insn, const MCDisassembler *Dis) {
2214  switch (operand.type) {
2215  default:
2216  debug("Unexpected type for a R/M operand");
2217  return true;
2218  case TYPE_R8:
2219  case TYPE_R16:
2220  case TYPE_R32:
2221  case TYPE_R64:
2222  case TYPE_Rv:
2223  case TYPE_MM64:
2224  case TYPE_XMM:
2225  case TYPE_YMM:
2226  case TYPE_ZMM:
2227  case TYPE_TMM:
2228  case TYPE_VK_PAIR:
2229  case TYPE_VK:
2230  case TYPE_DEBUGREG:
2231  case TYPE_CONTROLREG:
2232  case TYPE_BNDR:
2233  return translateRMRegister(mcInst, insn);
2234  case TYPE_M:
2235  case TYPE_MVSIBX:
2236  case TYPE_MVSIBY:
2237  case TYPE_MVSIBZ:
2238  return translateRMMemory(mcInst, insn, Dis);
2239  case TYPE_MSIB:
2240  return translateRMMemory(mcInst, insn, Dis, true);
2241  }
2242 }
2243 
2244 /// translateFPRegister - Translates a stack position on the FPU stack to its
2245 /// LLVM form, and appends it to an MCInst.
2246 ///
2247 /// @param mcInst - The MCInst to append to.
2248 /// @param stackPos - The stack position to translate.
2249 static void translateFPRegister(MCInst &mcInst,
2250  uint8_t stackPos) {
2251  mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos));
2252 }
2253 
2254 /// translateMaskRegister - Translates a 3-bit mask register number to
2255 /// LLVM form, and appends it to an MCInst.
2256 ///
2257 /// @param mcInst - The MCInst to append to.
2258 /// @param maskRegNum - Number of mask register from 0 to 7.
2259 /// @return - false on success; true otherwise.
2260 static bool translateMaskRegister(MCInst &mcInst,
2261  uint8_t maskRegNum) {
2262  if (maskRegNum >= 8) {
2263  debug("Invalid mask register number");
2264  return true;
2265  }
2266 
2267  mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum));
2268  return false;
2269 }
2270 
2271 /// translateOperand - Translates an operand stored in an internal instruction
2272 /// to LLVM's format and appends it to an MCInst.
2273 ///
2274 /// @param mcInst - The MCInst to append to.
2275 /// @param operand - The operand, as stored in the descriptor table.
2276 /// @param insn - The internal instruction.
2277 /// @return - false on success; true otherwise.
2278 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
2279  InternalInstruction &insn,
2280  const MCDisassembler *Dis) {
2281  switch (operand.encoding) {
2282  default:
2283  debug("Unhandled operand encoding during translation");
2284  return true;
2285  case ENCODING_REG:
2286  translateRegister(mcInst, insn.reg);
2287  return false;
2288  case ENCODING_WRITEMASK:
2289  return translateMaskRegister(mcInst, insn.writemask);
2290  case ENCODING_SIB:
2293  return translateRM(mcInst, operand, insn, Dis);
2294  case ENCODING_IB:
2295  case ENCODING_IW:
2296  case ENCODING_ID:
2297  case ENCODING_IO:
2298  case ENCODING_Iv:
2299  case ENCODING_Ia:
2300  translateImmediate(mcInst,
2301  insn.immediates[insn.numImmediatesTranslated++],
2302  operand,
2303  insn,
2304  Dis);
2305  return false;
2306  case ENCODING_IRC:
2307  mcInst.addOperand(MCOperand::createImm(insn.RC));
2308  return false;
2309  case ENCODING_SI:
2310  return translateSrcIndex(mcInst, insn);
2311  case ENCODING_DI:
2312  return translateDstIndex(mcInst, insn);
2313  case ENCODING_RB:
2314  case ENCODING_RW:
2315  case ENCODING_RD:
2316  case ENCODING_RO:
2317  case ENCODING_Rv:
2318  translateRegister(mcInst, insn.opcodeRegister);
2319  return false;
2320  case ENCODING_CC:
2321  mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));
2322  return false;
2323  case ENCODING_FP:
2324  translateFPRegister(mcInst, insn.modRM & 7);
2325  return false;
2326  case ENCODING_VVVV:
2327  translateRegister(mcInst, insn.vvvv);
2328  return false;
2329  case ENCODING_DUP:
2330  return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
2331  insn, Dis);
2332  }
2333 }
2334 
2335 /// translateInstruction - Translates an internal instruction and all its
2336 /// operands to an MCInst.
2337 ///
2338 /// @param mcInst - The MCInst to populate with the instruction's data.
2339 /// @param insn - The internal instruction.
2340 /// @return - false on success; true otherwise.
2341 static bool translateInstruction(MCInst &mcInst,
2342  InternalInstruction &insn,
2343  const MCDisassembler *Dis) {
2344  if (!insn.spec) {
2345  debug("Instruction has no specification");
2346  return true;
2347  }
2348 
2349  mcInst.clear();
2350  mcInst.setOpcode(insn.instructionID);
2351  // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
2352  // prefix bytes should be disassembled as xrelease and xacquire then set the
2353  // opcode to those instead of the rep and repne opcodes.
2354  if (insn.xAcquireRelease) {
2355  if(mcInst.getOpcode() == X86::REP_PREFIX)
2356  mcInst.setOpcode(X86::XRELEASE_PREFIX);
2357  else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
2358  mcInst.setOpcode(X86::XACQUIRE_PREFIX);
2359  }
2360 
2361  insn.numImmediatesTranslated = 0;
2362 
2363  for (const auto &Op : insn.operands) {
2364  if (Op.encoding != ENCODING_NONE) {
2365  if (translateOperand(mcInst, Op, insn, Dis)) {
2366  return true;
2367  }
2368  }
2369  }
2370 
2371  return false;
2372 }
2373 
2375  const MCSubtargetInfo &STI,
2376  MCContext &Ctx) {
2377  std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
2378  return new X86GenericDisassembler(STI, Ctx, std::move(MII));
2379 }
2380 
2382  // Register the disassembler.
2387 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::X86Disassembler::MODE_16BIT
@ MODE_16BIT
Definition: X86DisassemblerDecoderCommon.h:470
i
i
Definition: README.txt:29
byte
SSE Variable shift can be custom lowered to something like which uses a small table unaligned load shuffle instead of going through memory byte
Definition: README-SSE.txt:11
llvm::X86Disassembler::InternalInstruction::xAcquireRelease
bool xAcquireRelease
Definition: X86DisassemblerDecoder.h:547
vvvvFromVEX3of3
#define vvvvFromVEX3of3(vex)
Definition: X86DisassemblerDecoder.h:56
llvm::X86Disassembler::InternalInstruction::vectorExtensionType
VectorExtensionType vectorExtensionType
Definition: X86DisassemblerDecoder.h:541
bFromXOP2of3
#define bFromXOP2of3(xop)
Definition: X86DisassemblerDecoder.h:67
baseFromSIB
#define baseFromSIB(sib)
Definition: X86DisassemblerDecoder.h:30
llvm::X86Disassembler::MODE_64BIT
@ MODE_64BIT
Definition: X86DisassemblerDecoderCommon.h:472
llvm::X86Disassembler::InternalInstruction::hasLockPrefix
bool hasLockPrefix
Definition: X86DisassemblerDecoder.h:554
llvm::X86Disassembler::InternalInstruction::displacement
int32_t displacement
Definition: X86DisassemblerDecoder.h:603
xFromXOP2of3
#define xFromXOP2of3(xop)
Definition: X86DisassemblerDecoder.h:66
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
THREEDNOW_MAP_SYM
#define THREEDNOW_MAP_SYM
Definition: X86DisassemblerDecoderCommon.h:33
lFromVEX2of2
#define lFromVEX2of2(vex)
Definition: X86DisassemblerDecoder.h:62
llvm::X86Disassembler::TYPE_NO_VEX_XOP
@ TYPE_NO_VEX_XOP
Definition: X86DisassemblerDecoder.h:505
wFromVEX3of3
#define wFromVEX3of3(vex)
Definition: X86DisassemblerDecoder.h:55
llvm::X86::BX_DI
@ BX_DI
Definition: X86Disassembler.cpp:1684
rFromEVEX2of4
#define rFromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:36
type
llvm::X86Disassembler::VEX_LOB_0F3A
@ VEX_LOB_0F3A
Definition: X86DisassemblerDecoder.h:485
llvm::X86Disassembler::SEG_OVERRIDE_GS
@ SEG_OVERRIDE_GS
Definition: X86DisassemblerDecoder.h:477
OpcodeDecision
Definition: X86Disassembler.cpp:108
OpcodeDecision::modRMDecisions
ModRMDecision modRMDecisions[256]
Definition: X86Disassembler.cpp:109
ppFromXOP3of3
#define ppFromXOP3of3(xop)
Definition: X86DisassemblerDecoder.h:72
MCDisassembler.h
debug
#define debug(s)
Definition: X86Disassembler.cpp:96
T
llvm::MCOperand::createImm
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:72
llvm::X86Disassembler::ATTR_REXW
@ ATTR_REXW
Definition: X86DisassemblerDecoderCommon.h:58
llvm::X86::IP_HAS_OP_SIZE
@ IP_HAS_OP_SIZE
Definition: X86BaseInfo.h:58
llvm::getTheX86_64Target
Target & getTheX86_64Target()
Definition: X86TargetInfo.cpp:17
is64Bit
static bool is64Bit(const char *name)
Definition: X86Disassembler.cpp:1019
CASE_ENCODING_RM
#define CASE_ENCODING_RM
Definition: X86DisassemblerDecoderCommon.h:343
llvm::X86Disassembler::TYPE_EVEX
@ TYPE_EVEX
Definition: X86DisassemblerDecoder.h:508
llvm::X86Disassembler::InstructionSpecifier::operands
uint16_t operands
Definition: X86DisassemblerDecoder.h:515
llvm::X86Disassembler::SEG_OVERRIDE_SS
@ SEG_OVERRIDE_SS
Definition: X86DisassemblerDecoder.h:473
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:137
op
#define op(i)
translateImmediate
static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateImmediate - Appends an immediate operand to an MCInst.
Definition: X86Disassembler.cpp:1905
isREX
static bool isREX(struct InternalInstruction *insn, uint8_t prefix)
Definition: X86Disassembler.cpp:205
llvm::N86::ESI
@ ESI
Definition: X86MCTargetDesc.h:51
mmmmmFromVEX2of3
#define mmmmmFromVEX2of3(vex)
Definition: X86DisassemblerDecoder.h:54
ModRMDecision::instructionIDs
uint16_t instructionIDs
Definition: X86Disassembler.cpp:103
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
ppFromVEX2of2
#define ppFromVEX2of2(vex)
Definition: X86DisassemblerDecoder.h:63
llvm::X86Disassembler::VEX_LOB_0F38
@ VEX_LOB_0F38
Definition: X86DisassemblerDecoder.h:484
Fail
#define Fail
Definition: AArch64Disassembler.cpp:267
llvm::X86Disassembler::InternalInstruction::consumedModRM
bool consumedModRM
Definition: X86DisassemblerDecoder.h:596
llvm::X86Disassembler::InternalInstruction::segmentOverride
SegmentOverride segmentOverride
Definition: X86DisassemblerDecoder.h:545
llvm::X86Disassembler::SEG_OVERRIDE_FS
@ SEG_OVERRIDE_FS
Definition: X86DisassemblerDecoder.h:476
modFromModRM
#define modFromModRM(modRM)
Definition: X86DisassemblerDecoder.h:25
scaleFromSIB
#define scaleFromSIB(sib)
Definition: X86DisassemblerDecoder.h:28
llvm::X86::IP_HAS_LOCK
@ IP_HAS_LOCK
Definition: X86BaseInfo.h:62
llvm::X86Disassembler::InternalInstruction::opcodeType
OpcodeType opcodeType
Definition: X86DisassemblerDecoder.h:577
readModRM
static int readModRM(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:607
translateFPRegister
static void translateFPRegister(MCInst &mcInst, uint8_t stackPos)
translateFPRegister - Translates a stack position on the FPU stack to its LLVM form,...
Definition: X86Disassembler.cpp:2249
nextByte
static int nextByte(ArrayRef< uint8_t > Bytes, uint64_t &Size)
Definition: WebAssemblyDisassembler.cpp:76
llvm::X86AS::FS
@ FS
Definition: X86.h:188
createX86Disassembler
static MCDisassembler * createX86Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
Definition: X86Disassembler.cpp:2374
llvm::X86Disassembler::EA_BASE_NONE
@ EA_BASE_NONE
Definition: X86DisassemblerDecoder.h:419
llvm::X86Disassembler::TYPE_XOP
@ TYPE_XOP
Definition: X86DisassemblerDecoder.h:509
llvm::X86Disassembler::InternalInstruction::sibIndex
SIBIndex sibIndex
Definition: X86DisassemblerDecoder.h:629
llvm::X86Disassembler::XOP_MAP_SELECT_8
@ XOP_MAP_SELECT_8
Definition: X86DisassemblerDecoder.h:491
llvm::TargetRegistry::RegisterMCDisassembler
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.
Definition: TargetRegistry.h:916
rFromXOP2of3
#define rFromXOP2of3(xop)
Definition: X86DisassemblerDecoder.h:65
T
#define T
Definition: Mips16ISelLowering.cpp:341
ret
to esp esp setne al movzbw ax esp setg cl movzbw cx cmove cx cl jne LBB1_2 esp ret(also really horrible code on ppc). This is due to the expand code for 64-bit compares. GCC produces multiple branches
llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:40
llvm::X86Disassembler::MODE_32BIT
@ MODE_32BIT
Definition: X86DisassemblerDecoderCommon.h:471
X86DisassemblerDecoder.h
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:80
rFromVEX2of3
#define rFromVEX2of3(vex)
Definition: X86DisassemblerDecoder.h:51
MAP6_SYM
#define MAP6_SYM
Definition: X86DisassemblerDecoderCommon.h:35
llvm::SIInstrFlags::DS
@ DS
Definition: SIDefines.h:60
ModRMDecision
Definition: X86Disassembler.cpp:101
translateSrcIndex
static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn)
translateSrcIndex - Appends a source index operand to an MCInst.
Definition: X86Disassembler.cpp:1858
vvvvFromVEX2of2
#define vvvvFromVEX2of2(vex)
Definition: X86DisassemblerDecoder.h:61
mmmFromEVEX2of4
#define mmmFromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:40
llvm::X86::sib
@ sib
Definition: X86Disassembler.cpp:1687
llvm::X86Disassembler::ATTR_EVEX
@ ATTR_EVEX
Definition: X86DisassemblerDecoderCommon.h:63
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
llvm::X86Disassembler::ATTR_EVEXK
@ ATTR_EVEXK
Definition: X86DisassemblerDecoderCommon.h:65
MAP5_SYM
#define MAP5_SYM
Definition: X86DisassemblerDecoderCommon.h:34
fixupReg
static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)
Definition: X86Disassembler.cpp:838
Format.h
ALL_SIB_BASES
#define ALL_SIB_BASES
Definition: X86DisassemblerDecoder.h:393
x86
Note that only the low bits of effective_addr2 are used On bit we don t eliminate the computation of the top half of effective_addr2 because we don t have whole function selection dags On x86
Definition: README.txt:318
rFromVEX2of2
#define rFromVEX2of2(vex)
Definition: X86DisassemblerDecoder.h:60
llvm::X86Disassembler::InternalInstruction::opcode
uint8_t opcode
Definition: X86DisassemblerDecoder.h:572
llvm::X86Disassembler::InternalInstruction::numImmediatesTranslated
uint8_t numImmediatesTranslated
Definition: X86DisassemblerDecoder.h:607
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
translateRMRegister
static bool translateRMRegister(MCInst &mcInst, InternalInstruction &insn)
translateRMRegister - Translates a register stored in the R/M field of the ModR/M byte to its LLVM eq...
Definition: X86Disassembler.cpp:2011
peek
static bool peek(struct InternalInstruction *insn, uint8_t &byte)
Definition: X86Disassembler.cpp:184
llvm::X86Disassembler::ATTR_XS
@ ATTR_XS
Definition: X86DisassemblerDecoderCommon.h:56
bFromREX
#define bFromREX(rex)
Definition: X86DisassemblerDecoder.h:34
getInstructionIDWithAttrMask
static int getInstructionIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)
Definition: X86Disassembler.cpp:1030
llvm::MCInst::setOpcode
void setOpcode(unsigned Op)
Definition: MCInst.h:197
llvm::X86Disassembler::InternalInstruction::opcodeRegister
Reg opcodeRegister
Definition: X86DisassemblerDecoder.h:611
llvm::X86Disassembler::InstructionContext
InstructionContext
Definition: X86DisassemblerDecoderCommon.h:283
llvm::X86Disassembler::EA_DISP_32
@ EA_DISP_32
Definition: X86DisassemblerDecoder.h:458
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
xFromREX
#define xFromREX(rex)
Definition: X86DisassemblerDecoder.h:33
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
EA_BASES_64BIT
#define EA_BASES_64BIT
Definition: X86DisassemblerDecoder.h:169
llvm::X86Disassembler::SEG_OVERRIDE_CS
@ SEG_OVERRIDE_CS
Definition: X86DisassemblerDecoder.h:472
r2FromEVEX2of4
#define r2FromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:39
llvm::X86Disassembler::ATTR_EVEXL2
@ ATTR_EVEXL2
Definition: X86DisassemblerDecoderCommon.h:64
readSIB
static int readSIB(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:514
llvm::X86Disassembler::SIB_BASE_NONE
@ SIB_BASE_NONE
Definition: X86DisassemblerDecoder.h:446
llvm::X86Disassembler::MAP5
@ MAP5
Definition: X86DisassemblerDecoderCommon.h:300
llvm::X86Disassembler::XOP_MAP_SELECT_A
@ XOP_MAP_SELECT_A
Definition: X86DisassemblerDecoder.h:493
lFromVEX3of3
#define lFromVEX3of3(vex)
Definition: X86DisassemblerDecoder.h:57
llvm::X86Disassembler::THREEBYTE_38
@ THREEBYTE_38
Definition: X86DisassemblerDecoderCommon.h:294
llvm::X86Disassembler::VEX_LOB_0F
@ VEX_LOB_0F
Definition: X86DisassemblerDecoder.h:483
wFromEVEX3of4
#define wFromEVEX3of4(evex)
Definition: X86DisassemblerDecoder.h:41
llvm::X86Disassembler::InternalInstruction
The x86 internal instruction, which is produced by the decoder.
Definition: X86DisassemblerDecoder.h:519
llvm::X86Disassembler::SEG_OVERRIDE_ES
@ SEG_OVERRIDE_ES
Definition: X86DisassemblerDecoder.h:475
llvm::X86::IP_HAS_AD_SIZE
@ IP_HAS_AD_SIZE
Definition: X86BaseInfo.h:59
llvm::X86Disassembler::InternalInstruction::rexPrefix
uint8_t rexPrefix
Definition: X86DisassemblerDecoder.h:543
llvm::X86Disassembler::EA_DISP_16
@ EA_DISP_16
Definition: X86DisassemblerDecoder.h:457
llvm::X86Disassembler::SIB_INDEX_NONE
@ SIB_INDEX_NONE
Definition: X86DisassemblerDecoder.h:434
MCContext.h
MCInstrInfo.h
llvm::X86Disassembler::InternalInstruction::hasAdSize
bool hasAdSize
Definition: X86DisassemblerDecoder.h:550
vvvvFromEVEX3of4
#define vvvvFromEVEX3of4(evex)
Definition: X86DisassemblerDecoder.h:42
XOP8_MAP_SYM
#define XOP8_MAP_SYM
Definition: X86DisassemblerDecoderCommon.h:30
llvm::X86Disassembler::InternalInstruction::vvvv
Reg vvvv
Definition: X86DisassemblerDecoder.h:589
MCInst.h
readImmediate
static int readImmediate(struct InternalInstruction *insn, uint8_t size)
Definition: X86Disassembler.cpp:1423
EA_BASES_32BIT
#define EA_BASES_32BIT
Definition: X86DisassemblerDecoder.h:133
MCSubtargetInfo.h
llvm::MCSubtargetInfo::getFeatureBits
const FeatureBitset & getFeatureBits() const
Definition: MCSubtargetInfo.h:111
readOperands
static int readOperands(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:1507
llvm::X86Disassembler::VEX_PREFIX_F2
@ VEX_PREFIX_F2
Definition: X86DisassemblerDecoder.h:501
llvm::X86Disassembler::InternalInstruction::startLocation
uint64_t startLocation
Definition: X86DisassemblerDecoder.h:530
ppFromVEX3of3
#define ppFromVEX3of3(vex)
Definition: X86DisassemblerDecoder.h:58
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::MCInst::setFlags
void setFlags(unsigned F)
Definition: MCInst.h:200
llvm::X86Disassembler::XOPA_MAP
@ XOPA_MAP
Definition: X86DisassemblerDecoderCommon.h:298
llvm::X86Disassembler::SIBIndex
SIBIndex
All possible values of the SIB index field.
Definition: X86DisassemblerDecoder.h:433
llvm::X86Disassembler::OperandEncoding
OperandEncoding
Definition: X86DisassemblerDecoderCommon.h:406
llvm::X86Disassembler::EA_DISP_NONE
@ EA_DISP_NONE
Definition: X86DisassemblerDecoder.h:455
translateOperand
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateOperand - Translates an operand stored in an internal instruction to LLVM's format and appen...
Definition: X86Disassembler.cpp:2278
bFromVEX2of3
#define bFromVEX2of3(vex)
Definition: X86DisassemblerDecoder.h:53
wFromREX
#define wFromREX(rex)
Definition: X86DisassemblerDecoder.h:31
llvm::X86Disassembler::ATTR_VEX
@ ATTR_VEX
Definition: X86DisassemblerDecoderCommon.h:61
llvm::MCDisassembler::DecodeStatus
DecodeStatus
Ternary decode status.
Definition: MCDisassembler.h:100
llvm::MCInst::addOperand
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
ONEBYTE_SYM
#define ONEBYTE_SYM
Definition: X86DisassemblerDecoderCommon.h:26
llvm::X86Disassembler::InternalInstruction::displacementSize
uint8_t displacementSize
Definition: X86DisassemblerDecoder.h:561
llvm::X86Disassembler::IC_max
@ IC_max
Definition: X86DisassemblerDecoderCommon.h:285
llvm::X86Disassembler::SIBBase
SIBBase
All possible values of the SIB base field.
Definition: X86DisassemblerDecoder.h:445
GENERIC_FIXUP_FUNC
#define GENERIC_FIXUP_FUNC(name, base, prefix, mask)
Definition: X86Disassembler.cpp:745
readMaskRegister
static int readMaskRegister(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:1494
getInstructionID
static int getInstructionID(struct InternalInstruction *insn, const MCInstrInfo *mii)
Definition: X86Disassembler.cpp:1085
llvm::X86Disassembler::InternalInstruction::modRM
uint8_t modRM
Definition: X86DisassemblerDecoder.h:597
llvm::X86Disassembler::InternalInstruction::eaRegBase
EABase eaRegBase
Definition: X86DisassemblerDecoder.h:617
llvm::X86Disassembler::TYPE_VEX_2B
@ TYPE_VEX_2B
Definition: X86DisassemblerDecoder.h:506
llvm::getTheX86_32Target
Target & getTheX86_32Target()
Definition: X86TargetInfo.cpp:13
lFromXOP3of3
#define lFromXOP3of3(xop)
Definition: X86DisassemblerDecoder.h:71
regFromModRM
#define regFromModRM(modRM)
Definition: X86DisassemblerDecoder.h:26
LLVMInitializeX86Disassembler
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler()
Definition: X86Disassembler.cpp:2381
X86MCTargetDesc.h
llvm::X86Disassembler::ATTR_OPSIZE
@ ATTR_OPSIZE
Definition: X86DisassemblerDecoderCommon.h:59
llvm::X86Disassembler::InternalInstruction::RC
uint8_t RC
Definition: X86DisassemblerDecoder.h:634
llvm::X86Disassembler::VEX_PREFIX_F3
@ VEX_PREFIX_F3
Definition: X86DisassemblerDecoder.h:500
llvm::X86Disassembler::InternalInstruction::bytes
llvm::ArrayRef< uint8_t > bytes
Definition: X86DisassemblerDecoder.h:521
llvm::X86::IP_HAS_REPEAT
@ IP_HAS_REPEAT
Definition: X86BaseInfo.h:61
index
splat index
Definition: README_ALTIVEC.txt:181
uint64_t
llvm::X86Disassembler::ATTR_EVEXB
@ ATTR_EVEXB
Definition: X86DisassemblerDecoderCommon.h:67
llvm::X86Disassembler::VEX_LOB_MAP6
@ VEX_LOB_MAP6
Definition: X86DisassemblerDecoder.h:487
llvm::X86Disassembler::ATTR_VEXL
@ ATTR_VEXL
Definition: X86DisassemblerDecoderCommon.h:62
tryAddingSymbolicOperand
static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, uint64_t Address, uint64_t Offset, uint64_t Width, MCInst &MI, const MCDisassembler *Dis)
tryAddingSymbolicOperand - trys to add a symbolic operand in place of the immediate Value in the MCIn...
Definition: X86Disassembler.cpp:1823
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:136
llvm::X86::BP_DI
@ BP_DI
Definition: X86Disassembler.cpp:1686
llvm::X86Disassembler::XOP8_MAP
@ XOP8_MAP
Definition: X86DisassemblerDecoderCommon.h:296
llvm::MCDisassembler::tryAddingPcLoadReferenceComment
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const
Definition: MCDisassembler.cpp:36
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::MCDisassembler
Superclass for all disassemblers.
Definition: MCDisassembler.h:76
ContextDecision
Definition: X86Disassembler.cpp:117
llvm::X86Disassembler::InternalInstruction::writemask
Reg writemask
Definition: X86DisassemblerDecoder.h:592
vvvvFromXOP3of3
#define vvvvFromXOP3of3(vex)
Definition: X86DisassemblerDecoder.h:70
decode
static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)
Definition: X86Disassembler.cpp:123
llvm::X86Disassembler::XOP_MAP_SELECT_9
@ XOP_MAP_SELECT_9
Definition: X86DisassemblerDecoder.h:492
llvm::X86Disassembler::InternalInstruction::immediates
uint64_t immediates[2]
Definition: X86DisassemblerDecoder.h:608
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::X86Disassembler::InternalInstruction::eaBase
EABase eaBase
Definition: X86DisassemblerDecoder.h:622
mmmmmFromXOP2of3
#define mmmmmFromXOP2of3(xop)
Definition: X86DisassemblerDecoder.h:68
llvm::X86Disassembler::EABase
EABase
All possible values of the base field for effective-address computations, a.k.a.
Definition: X86DisassemblerDecoder.h:418
llvm::MCOI::OperandType
OperandType
Operands are tagged with one of the values of this enum.
Definition: MCInstrDesc.h:56
llvm::X86Disassembler::InternalInstruction::reg
Reg reg
Definition: X86DisassemblerDecoder.h:625
llvm::X86Disassembler::InternalInstruction::sib
uint8_t sib
Definition: X86DisassemblerDecoder.h:600
llvm::X86Disassembler::InternalInstruction::immediateOffset
uint8_t immediateOffset
Definition: X86DisassemblerDecoder.h:567
llvm::X86Disassembler::ONEBYTE
@ ONEBYTE
Definition: X86DisassemblerDecoderCommon.h:292
llvm::X86::IP_NO_PREFIX
@ IP_NO_PREFIX
Definition: X86BaseInfo.h:57
llvm::HighlightColor::Address
@ Address
ppFromEVEX3of4
#define ppFromEVEX3of4(evex)
Definition: X86DisassemblerDecoder.h:43
rmFromModRM
#define rmFromModRM(modRM)
Definition: X86DisassemblerDecoder.h:27
llvm::X86Disassembler::InternalInstruction::spec
const InstructionSpecifier * spec
Definition: X86DisassemblerDecoder.h:581
llvm::X86Disassembler::ATTR_ADSIZE
@ ATTR_ADSIZE
Definition: X86DisassemblerDecoderCommon.h:60
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
translateInstruction
static bool translateInstruction(MCInst &target, InternalInstruction &source, const MCDisassembler *Dis)
translateInstruction - Translates an internal instruction and all its operands to an MCInst.
Definition: X86Disassembler.cpp:2341
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1707
SI
StandardInstrumentations SI(Debug, VerifyEach)
base
therefore end up llgh r3 lr r0 br r14 but truncating the load would lh r3 br r14 Functions ret i64 and ought to be implemented ngr r0 br r14 but two address optimizations reverse the order of the AND and ngr r2 lgr r0 br r14 CodeGen SystemZ and ll has several examples of this Out of range displacements are usually handled by loading the full address into a register In many cases it would be better to create an anchor point instead E g i64 base
Definition: README.txt:125
llvm::X86Disassembler::InternalInstruction::instructionID
uint16_t instructionID
Definition: X86DisassemblerDecoder.h:579
llvm::X86Disassembler::ATTR_EVEXKZ
@ ATTR_EVEXKZ
Definition: X86DisassemblerDecoderCommon.h:66
isBranch
static bool isBranch(unsigned Opcode)
Definition: R600InstrInfo.cpp:641
llvm::X86Disassembler::OperandSpecifier::encoding
uint8_t encoding
Definition: X86DisassemblerDecoderCommon.h:460
llvm::MCDisassembler::tryAddingSymbolicOperand
bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t InstSize) const
Definition: MCDisassembler.cpp:26
llvm::MCInstrInfo::getName
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:68
llvm::X86Disassembler::InternalInstruction::addressSize
uint8_t addressSize
Definition: X86DisassemblerDecoder.h:560
llvm::X86Disassembler::InternalInstruction::numImmediatesConsumed
uint8_t numImmediatesConsumed
Definition: X86DisassemblerDecoder.h:606
zFromEVEX4of4
#define zFromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:44
llvm::X86Disassembler::InternalInstruction::operands
ArrayRef< OperandSpecifier > operands
Definition: X86DisassemblerDecoder.h:636
llvm::X86Disassembler::XOP9_MAP
@ XOP9_MAP
Definition: X86DisassemblerDecoderCommon.h:297
llvm::X86Disassembler::InternalInstruction::displacementOffset
uint8_t displacementOffset
Definition: X86DisassemblerDecoder.h:566
llvm::X86Disassembler::OpcodeType
OpcodeType
Definition: X86DisassemblerDecoderCommon.h:291
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1630
llvm::HexagonMCInstrInfo::isPrefix
bool isPrefix(MCInstrInfo const &MCII, MCInst const &MCI)
Definition: HexagonMCInstrInfo.cpp:731
llvm::ArrayRef< uint8_t >
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
bFromEVEX2of4
#define bFromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:38
llvm::X86Disassembler::InternalInstruction::regBase
Reg regBase
Definition: X86DisassemblerDecoder.h:618
readVVVV
static int readVVVV(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:1467
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::MCOperand::createReg
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
llvm::X86Disassembler::MAP6
@ MAP6
Definition: X86DisassemblerDecoderCommon.h:301
uint32_t
translateDstIndex
static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn)
translateDstIndex - Appends a destination index operand to an MCInst.
Definition: X86Disassembler.cpp:1883
llvm::X86Disassembler::OperandSpecifier
The specification for how to extract and interpret one operand.
Definition: X86DisassemblerDecoderCommon.h:459
llvm::X86::BX_SI
@ BX_SI
Definition: X86Disassembler.cpp:1683
readOpcodeRegister
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
Definition: X86Disassembler.cpp:1380
llvm::X86Disassembler::InternalInstruction::repeatPrefix
uint8_t repeatPrefix
Definition: X86DisassemblerDecoder.h:556
translateRM
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateRM - Translates an operand stored in the R/M (and possibly SIB) byte of an instruction to LL...
Definition: X86Disassembler.cpp:2212
llvm::X86Disassembler::InternalInstruction::eaDisplacement
EADisplacement eaDisplacement
Definition: X86DisassemblerDecoder.h:623
rFromREX
#define rFromREX(rex)
Definition: X86DisassemblerDecoder.h:32
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::X86Disassembler::VEX_PREFIX_66
@ VEX_PREFIX_66
Definition: X86DisassemblerDecoder.h:499
tryAddingPcLoadReferenceComment
static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, const void *Decoder)
tryAddingPcLoadReferenceComment - trys to add a comment as to what is being referenced by a load inst...
Definition: X86Disassembler.cpp:1838
ContextDecision::opcodeDecisions
OpcodeDecision opcodeDecisions[IC_max]
Definition: X86Disassembler.cpp:118
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:290
readOpcode
static bool readOpcode(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:875
llvm::X86Disassembler::InternalInstruction::readerCursor
uint64_t readerCursor
Definition: X86DisassemblerDecoder.h:523
TWOBYTE_SYM
#define TWOBYTE_SYM
Definition: X86DisassemblerDecoderCommon.h:27
llvm::X86Disassembler::InternalInstruction::mode
DisassemblerMode mode
Definition: X86DisassemblerDecoder.h:528
llvm::X86Disassembler
Definition: X86DisassemblerDecoderCommon.h:22
v2FromEVEX4of4
#define v2FromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:48
aaaFromEVEX4of4
#define aaaFromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:49
REGS_ZMM
#define REGS_ZMM
Definition: X86DisassemblerDecoder.h:283
name
static const char * name
Definition: SVEIntrinsicOpts.cpp:74
readDisplacement
static int readDisplacement(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:576
ALL_REGS
#define ALL_REGS
Definition: X86DisassemblerDecoder.h:397
llvm::X86Disassembler::InternalInstruction::hasOpSize
bool hasOpSize
Definition: X86DisassemblerDecoder.h:552
llvm::X86Disassembler::ATTR_XD
@ ATTR_XD
Definition: X86DisassemblerDecoderCommon.h:57
llvm::MCInstrInfo
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:25
Insn
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
Definition: AArch64MIPeepholeOpt.cpp:86
llvm::X86Disassembler::SEG_OVERRIDE_max
@ SEG_OVERRIDE_max
Definition: X86DisassemblerDecoder.h:478
REGS_YMM
#define REGS_YMM
Definition: X86DisassemblerDecoder.h:249
std
Definition: BitVector.h:838
XOPA_MAP_SYM
#define XOPA_MAP_SYM
Definition: X86DisassemblerDecoderCommon.h:32
translateRegister
static void translateRegister(MCInst &mcInst, Reg reg)
translateRegister - Translates an internal register to the appropriate LLVM register,...
Definition: X86Disassembler.cpp:1795
ALL_EA_BASES
#define ALL_EA_BASES
Definition: X86DisassemblerDecoder.h:388
uint16_t
translateRMMemory
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, const MCDisassembler *Dis, bool ForceSIB=false)
translateRMMemory - Translates a memory operand stored in the Mod and R/M fields of an internal instr...
Definition: X86Disassembler.cpp:2050
THREEBYTE38_SYM
#define THREEBYTE38_SYM
Definition: X86DisassemblerDecoderCommon.h:28
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
XOP9_MAP_SYM
#define XOP9_MAP_SYM
Definition: X86DisassemblerDecoderCommon.h:31
translateMaskRegister
static bool translateMaskRegister(MCInst &mcInst, uint8_t maskRegNum)
translateMaskRegister - Translates a 3-bit mask register number to LLVM form, and appends it to an MC...
Definition: X86Disassembler.cpp:2260
llvm::N86::EDI
@ EDI
Definition: X86MCTargetDesc.h:51
Success
#define Success
Definition: AArch64Disassembler.cpp:266
llvm::X86Disassembler::OperandSpecifier::type
uint8_t type
Definition: X86DisassemblerDecoderCommon.h:461
lFromEVEX4of4
#define lFromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:46
llvm::X86::IP_HAS_REPEAT_NE
@ IP_HAS_REPEAT_NE
Definition: X86BaseInfo.h:60
llvm::MCInst::getOpcode
unsigned getOpcode() const
Definition: MCInst.h:198
llvm::X86Disassembler::TWOBYTE
@ TWOBYTE
Definition: X86DisassemblerDecoderCommon.h:293
llvm::X86::BP_SI
@ BP_SI
Definition: X86Disassembler.cpp:1685
llvm::X86Disassembler::TYPE_VEX_3B
@ TYPE_VEX_3B
Definition: X86DisassemblerDecoder.h:507
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:414
wFromXOP3of3
#define wFromXOP3of3(xop)
Definition: X86DisassemblerDecoder.h:69
REGS_XMM
#define REGS_XMM
Definition: X86DisassemblerDecoder.h:215
llvm::X86AS::GS
@ GS
Definition: X86.h:187
THREEBYTE3A_SYM
#define THREEBYTE3A_SYM
Definition: X86DisassemblerDecoderCommon.h:29
llvm::X86Disassembler::EA_DISP_8
@ EA_DISP_8
Definition: X86DisassemblerDecoder.h:456
X86BaseInfo.h
llvm::X86::sib64
@ sib64
Definition: X86Disassembler.cpp:1688
llvm::X86Disassembler::DisassemblerMode
DisassemblerMode
Decoding mode for the Intel disassembler.
Definition: X86DisassemblerDecoderCommon.h:469
llvm::X86AS::SS
@ SS
Definition: X86.h:189
llvm::X86Disassembler::InternalInstruction::mandatoryPrefix
uint8_t mandatoryPrefix
Definition: X86DisassemblerDecoder.h:537
l2FromEVEX4of4
#define l2FromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:45
llvm::X86Disassembler::InternalInstruction::sibIndexBase
SIBIndex sibIndexBase
Definition: X86DisassemblerDecoder.h:628
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
INSTRUCTIONS_SYM
#define INSTRUCTIONS_SYM
Definition: X86DisassemblerDecoderCommon.h:24
llvm::X86Disassembler::InternalInstruction::sibScale
uint8_t sibScale
Definition: X86DisassemblerDecoder.h:630
llvm::StringRef::data
const LLVM_NODISCARD char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:148
is16BitEquivalent
static bool is16BitEquivalent(const char *orig, const char *equiv)
Definition: X86Disassembler.cpp:1000
consume
static bool consume(InternalInstruction *insn, T &ptr)
Definition: X86Disassembler.cpp:192
bFromEVEX4of4
#define bFromEVEX4of4(evex)
Definition: X86DisassemblerDecoder.h:47
llvm::MCOperand
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
CASE_ENCODING_VSIB
#define CASE_ENCODING_VSIB
Definition: X86DisassemblerDecoderCommon.h:352
xFromEVEX2of4
#define xFromEVEX2of4(evex)
Definition: X86DisassemblerDecoder.h:37
llvm::X86Disassembler::OperandType
OperandType
Definition: X86DisassemblerDecoderCommon.h:452
llvm::X86Disassembler::VEX_LOB_MAP5
@ VEX_LOB_MAP5
Definition: X86DisassemblerDecoder.h:486
xFromVEX2of3
#define xFromVEX2of3(vex)
Definition: X86DisassemblerDecoder.h:52
llvm::X86Disassembler::THREEDNOW_MAP
@ THREEDNOW_MAP
Definition: X86DisassemblerDecoderCommon.h:299
llvm::X86Disassembler::InstrUID
uint16_t InstrUID
Definition: X86DisassemblerDecoderCommon.h:311
llvm::X86Disassembler::SEG_OVERRIDE_DS
@ SEG_OVERRIDE_DS
Definition: X86DisassemblerDecoder.h:474
llvm::X86Disassembler::InstructionSpecifier
The specification for how to extract and interpret a full instruction and its operands.
Definition: X86DisassemblerDecoder.h:514
readPrefixes
static int readPrefixes(struct InternalInstruction *insn)
Definition: X86Disassembler.cpp:214
raw_ostream.h
llvm::X86Disassembler::InternalInstruction::registerSize
uint8_t registerSize
Definition: X86DisassemblerDecoder.h:559
indexFromSIB
#define indexFromSIB(sib)
Definition: X86DisassemblerDecoder.h:29
segmentRegnums
static const uint8_t segmentRegnums[SEG_OVERRIDE_max]
Definition: X86Disassembler.cpp:1844
llvm::X86Disassembler::ATTR_64BIT
@ ATTR_64BIT
Definition: X86DisassemblerDecoderCommon.h:55
X86
Unrolling by would eliminate the &in both leading to a net reduction in code size The resultant code would then also be suitable for exit value computation We miss a bunch of rotate opportunities on various including etc On X86
Definition: README.txt:568
X86TargetInfo.h
TargetRegistry.h
ModRMDecision::modrm_type
uint8_t modrm_type
Definition: X86Disassembler.cpp:102
MCExpr.h
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:75
llvm::X86Disassembler::THREEBYTE_3A
@ THREEBYTE_3A
Definition: X86DisassemblerDecoderCommon.h:295
llvm::X86Disassembler::ATTR_NONE
@ ATTR_NONE
Definition: X86DisassemblerDecoderCommon.h:54
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::MCInst::clear
void clear()
Definition: MCInst.h:215
llvm::X86Disassembler::InternalInstruction::immediateSize
uint8_t immediateSize
Definition: X86DisassemblerDecoder.h:562
llvm::X86Disassembler::InternalInstruction::vectorExtensionPrefix
uint8_t vectorExtensionPrefix[4]
Definition: X86DisassemblerDecoder.h:539
llvm::X86Disassembler::InternalInstruction::sibBase
SIBBase sibBase
Definition: X86DisassemblerDecoder.h:631