LLVM  10.0.0svn
X86Disassembler.cpp
Go to the documentation of this file.
1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is part of the X86 Disassembler.
10 // It contains code to translate the data produced by the decoder into
11 // MCInsts.
12 //
13 //
14 // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
15 // 64-bit X86 instruction sets. The main decode sequence for an assembly
16 // instruction in this disassembler is:
17 //
18 // 1. Read the prefix bytes and determine the attributes of the instruction.
19 // These attributes, recorded in enum attributeBits
20 // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
21 // provides a mapping from bitmasks to contexts, which are represented by
22 // enum InstructionContext (ibid.).
23 //
24 // 2. Read the opcode, and determine what kind of opcode it is. The
25 // disassembler distinguishes four kinds of opcodes, which are enumerated in
26 // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
27 // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
28 // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
29 //
30 // 3. Depending on the opcode type, look in one of four ClassDecision structures
31 // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
32 // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
33 // a ModRMDecision (ibid.).
34 //
35 // 4. Some instructions, such as escape opcodes or extended opcodes, or even
36 // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
37 // ModR/M byte to complete decode. The ModRMDecision's type is an entry from
38 // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
39 // ModR/M byte is required and how to interpret it.
40 //
41 // 5. After resolving the ModRMDecision, the disassembler has a unique ID
42 // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
43 // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
44 // meanings of its operands.
45 //
46 // 6. For each operand, its encoding is an entry from OperandEncoding
47 // (X86DisassemblerDecoderCommon.h) and its type is an entry from
48 // OperandType (ibid.). The encoding indicates how to read it from the
49 // instruction; the type indicates how to interpret the value once it has
50 // been read. For example, a register operand could be stored in the R/M
51 // field of the ModR/M byte, the REG field of the ModR/M byte, or added to
52 // the main opcode. This is orthogonal from its meaning (an GPR or an XMM
53 // register, for instance). Given this information, the operands can be
54 // extracted and interpreted.
55 //
56 // 7. As the last step, the disassembler translates the instruction information
57 // and operands into a format understandable by the client - in this case, an
58 // MCInst for use by the MC infrastructure.
59 //
60 // The disassembler is broken broadly into two parts: the table emitter that
61 // emits the instruction decode tables discussed above during compilation, and
62 // the disassembler itself. The table emitter is documented in more detail in
63 // utils/TableGen/X86DisassemblerEmitter.h.
64 //
65 // X86Disassembler.cpp contains the code responsible for step 7, and for
66 // invoking the decoder to execute steps 1-6.
67 // X86DisassemblerDecoderCommon.h contains the definitions needed by both the
68 // table emitter and the disassembler.
69 // X86DisassemblerDecoder.h contains the public interface of the decoder,
70 // factored out into C for possible use by other projects.
71 // X86DisassemblerDecoder.c contains the source code of the decoder, which is
72 // responsible for steps 1-6.
73 //
74 //===----------------------------------------------------------------------===//
75 
79 #include "X86DisassemblerDecoder.h"
80 #include "llvm/MC/MCContext.h"
82 #include "llvm/MC/MCExpr.h"
83 #include "llvm/MC/MCInst.h"
84 #include "llvm/MC/MCInstrInfo.h"
86 #include "llvm/Support/Debug.h"
89 
90 using namespace llvm;
91 using namespace llvm::X86Disassembler;
92 
93 #define DEBUG_TYPE "x86-disassembler"
94 
95 void llvm::X86Disassembler::Debug(const char *file, unsigned line,
96  const char *s) {
97  dbgs() << file << ":" << line << ": " << s;
98 }
99 
101  const void *mii) {
102  const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii);
103  return MII->getName(Opcode);
104 }
105 
106 #define debug(s) LLVM_DEBUG(Debug(__FILE__, __LINE__, s));
107 
108 namespace llvm {
109 
110 // Fill-ins to make the compiler happy. These constants are never actually
111 // assigned; they are just filler to make an automatically-generated switch
112 // statement work.
113 namespace X86 {
114  enum {
115  BX_SI = 500,
116  BX_DI = 501,
117  BP_SI = 502,
118  BP_DI = 503,
119  sib = 504,
120  sib64 = 505
121  };
122 }
123 
124 }
125 
126 static bool translateInstruction(MCInst &target,
127  InternalInstruction &source,
128  const MCDisassembler *Dis);
129 
130 namespace {
131 
132 /// Generic disassembler for all X86 platforms. All each platform class should
133 /// have to do is subclass the constructor, and provide a different
134 /// disassemblerMode value.
135 class X86GenericDisassembler : public MCDisassembler {
136  std::unique_ptr<const MCInstrInfo> MII;
137 public:
138  X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
139  std::unique_ptr<const MCInstrInfo> MII);
140 public:
141  DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
142  ArrayRef<uint8_t> Bytes, uint64_t Address,
143  raw_ostream &vStream,
144  raw_ostream &cStream) const override;
145 
146 private:
147  DisassemblerMode fMode;
148 };
149 
150 }
151 
152 X86GenericDisassembler::X86GenericDisassembler(
153  const MCSubtargetInfo &STI,
154  MCContext &Ctx,
155  std::unique_ptr<const MCInstrInfo> MII)
156  : MCDisassembler(STI, Ctx), MII(std::move(MII)) {
157  const FeatureBitset &FB = STI.getFeatureBits();
158  if (FB[X86::Mode16Bit]) {
159  fMode = MODE_16BIT;
160  return;
161  } else if (FB[X86::Mode32Bit]) {
162  fMode = MODE_32BIT;
163  return;
164  } else if (FB[X86::Mode64Bit]) {
165  fMode = MODE_64BIT;
166  return;
167  }
168 
169  llvm_unreachable("Invalid CPU mode");
170 }
171 
172 namespace {
173 struct Region {
174  ArrayRef<uint8_t> Bytes;
175  uint64_t Base;
176  Region(ArrayRef<uint8_t> Bytes, uint64_t Base) : Bytes(Bytes), Base(Base) {}
177 };
178 } // end anonymous namespace
179 
180 /// A callback function that wraps the readByte method from Region.
181 ///
182 /// @param Arg - The generic callback parameter. In this case, this should
183 /// be a pointer to a Region.
184 /// @param Byte - A pointer to the byte to be read.
185 /// @param Address - The address to be read.
186 static int regionReader(const void *Arg, uint8_t *Byte, uint64_t Address) {
187  auto *R = static_cast<const Region *>(Arg);
188  ArrayRef<uint8_t> Bytes = R->Bytes;
189  unsigned Index = Address - R->Base;
190  if (Bytes.size() <= Index)
191  return -1;
192  *Byte = Bytes[Index];
193  return 0;
194 }
195 
196 /// logger - a callback function that wraps the operator<< method from
197 /// raw_ostream.
198 ///
199 /// @param arg - The generic callback parameter. This should be a pointe
200 /// to a raw_ostream.
201 /// @param log - A string to be logged. logger() adds a newline.
202 static void logger(void* arg, const char* log) {
203  if (!arg)
204  return;
205 
206  raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
207  vStream << log << "\n";
208 }
209 
210 //
211 // Public interface for the disassembler
212 //
213 
214 MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
215  MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
216  raw_ostream &VStream, raw_ostream &CStream) const {
217  CommentStream = &CStream;
218 
219  InternalInstruction InternalInstr;
220 
221  dlog_t LoggerFn = logger;
222  if (&VStream == &nulls())
223  LoggerFn = nullptr; // Disable logging completely if it's going to nulls().
224 
225  Region R(Bytes, Address);
226 
227  int Ret = decodeInstruction(&InternalInstr, regionReader, (const void *)&R,
228  LoggerFn, (void *)&VStream,
229  (const void *)MII.get(), Address, fMode);
230 
231  if (Ret) {
232  Size = InternalInstr.readerCursor - Address;
233  return Fail;
234  } else {
235  Size = InternalInstr.length;
236  bool Ret = translateInstruction(Instr, InternalInstr, this);
237  if (!Ret) {
238  unsigned Flags = X86::IP_NO_PREFIX;
239  if (InternalInstr.hasAdSize)
240  Flags |= X86::IP_HAS_AD_SIZE;
241  if (!InternalInstr.mandatoryPrefix) {
242  if (InternalInstr.hasOpSize)
243  Flags |= X86::IP_HAS_OP_SIZE;
244  if (InternalInstr.repeatPrefix == 0xf2)
245  Flags |= X86::IP_HAS_REPEAT_NE;
246  else if (InternalInstr.repeatPrefix == 0xf3 &&
247  // It should not be 'pause' f3 90
248  InternalInstr.opcode != 0x90)
249  Flags |= X86::IP_HAS_REPEAT;
250  if (InternalInstr.hasLockPrefix)
251  Flags |= X86::IP_HAS_LOCK;
252  }
253  Instr.setFlags(Flags);
254  }
255  return (!Ret) ? Success : Fail;
256  }
257 }
258 
259 //
260 // Private code that translates from struct InternalInstructions to MCInsts.
261 //
262 
263 /// translateRegister - Translates an internal register to the appropriate LLVM
264 /// register, and appends it as an operand to an MCInst.
265 ///
266 /// @param mcInst - The MCInst to append to.
267 /// @param reg - The Reg to append.
268 static void translateRegister(MCInst &mcInst, Reg reg) {
269 #define ENTRY(x) X86::x,
270  static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};
271 #undef ENTRY
272 
273  MCPhysReg llvmRegnum = llvmRegnums[reg];
274  mcInst.addOperand(MCOperand::createReg(llvmRegnum));
275 }
276 
277 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
278 /// immediate Value in the MCInst.
279 ///
280 /// @param Value - The immediate Value, has had any PC adjustment made by
281 /// the caller.
282 /// @param isBranch - If the instruction is a branch instruction
283 /// @param Address - The starting address of the instruction
284 /// @param Offset - The byte offset to this immediate in the instruction
285 /// @param Width - The byte width of this immediate in the instruction
286 ///
287 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
288 /// called then that function is called to get any symbolic information for the
289 /// immediate in the instruction using the Address, Offset and Width. If that
290 /// returns non-zero then the symbolic information it returns is used to create
291 /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
292 /// returns zero and isBranch is true then a symbol look up for immediate Value
293 /// is done and if a symbol is found an MCExpr is created with that, else
294 /// an MCExpr with the immediate Value is created. This function returns true
295 /// if it adds an operand to the MCInst and false otherwise.
296 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
297  uint64_t Address, uint64_t Offset,
298  uint64_t Width, MCInst &MI,
299  const MCDisassembler *Dis) {
300  return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch,
301  Offset, Width);
302 }
303 
304 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
305 /// referenced by a load instruction with the base register that is the rip.
306 /// These can often be addresses in a literal pool. The Address of the
307 /// instruction and its immediate Value are used to determine the address
308 /// being referenced in the literal pool entry. The SymbolLookUp call back will
309 /// return a pointer to a literal 'C' string if the referenced address is an
310 /// address into a section with 'C' string literals.
311 static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,
312  const void *Decoder) {
313  const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
314  Dis->tryAddingPcLoadReferenceComment(Value, Address);
315 }
316 
317 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
318  0, // SEG_OVERRIDE_NONE
319  X86::CS,
320  X86::SS,
321  X86::DS,
322  X86::ES,
323  X86::FS,
324  X86::GS
325 };
326 
327 /// translateSrcIndex - Appends a source index operand to an MCInst.
328 ///
329 /// @param mcInst - The MCInst to append to.
330 /// @param insn - The internal instruction.
331 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
332  unsigned baseRegNo;
333 
334  if (insn.mode == MODE_64BIT)
335  baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;
336  else if (insn.mode == MODE_32BIT)
337  baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;
338  else {
339  assert(insn.mode == MODE_16BIT);
340  baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;
341  }
342  MCOperand baseReg = MCOperand::createReg(baseRegNo);
343  mcInst.addOperand(baseReg);
344 
345  MCOperand segmentReg;
346  segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
347  mcInst.addOperand(segmentReg);
348  return false;
349 }
350 
351 /// translateDstIndex - Appends a destination index operand to an MCInst.
352 ///
353 /// @param mcInst - The MCInst to append to.
354 /// @param insn - The internal instruction.
355 
356 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
357  unsigned baseRegNo;
358 
359  if (insn.mode == MODE_64BIT)
360  baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;
361  else if (insn.mode == MODE_32BIT)
362  baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;
363  else {
364  assert(insn.mode == MODE_16BIT);
365  baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;
366  }
367  MCOperand baseReg = MCOperand::createReg(baseRegNo);
368  mcInst.addOperand(baseReg);
369  return false;
370 }
371 
372 /// translateImmediate - Appends an immediate operand to an MCInst.
373 ///
374 /// @param mcInst - The MCInst to append to.
375 /// @param immediate - The immediate value to append.
376 /// @param operand - The operand, as stored in the descriptor table.
377 /// @param insn - The internal instruction.
378 static void translateImmediate(MCInst &mcInst, uint64_t immediate,
379  const OperandSpecifier &operand,
380  InternalInstruction &insn,
381  const MCDisassembler *Dis) {
382  // Sign-extend the immediate if necessary.
383 
384  OperandType type = (OperandType)operand.type;
385 
386  bool isBranch = false;
387  uint64_t pcrel = 0;
388  if (type == TYPE_REL) {
389  isBranch = true;
390  pcrel = insn.startLocation +
391  insn.immediateOffset + insn.immediateSize;
392  switch (operand.encoding) {
393  default:
394  break;
395  case ENCODING_Iv:
396  switch (insn.displacementSize) {
397  default:
398  break;
399  case 1:
400  if(immediate & 0x80)
401  immediate |= ~(0xffull);
402  break;
403  case 2:
404  if(immediate & 0x8000)
405  immediate |= ~(0xffffull);
406  break;
407  case 4:
408  if(immediate & 0x80000000)
409  immediate |= ~(0xffffffffull);
410  break;
411  case 8:
412  break;
413  }
414  break;
415  case ENCODING_IB:
416  if(immediate & 0x80)
417  immediate |= ~(0xffull);
418  break;
419  case ENCODING_IW:
420  if(immediate & 0x8000)
421  immediate |= ~(0xffffull);
422  break;
423  case ENCODING_ID:
424  if(immediate & 0x80000000)
425  immediate |= ~(0xffffffffull);
426  break;
427  }
428  }
429  // By default sign-extend all X86 immediates based on their encoding.
430  else if (type == TYPE_IMM) {
431  switch (operand.encoding) {
432  default:
433  break;
434  case ENCODING_IB:
435  if(immediate & 0x80)
436  immediate |= ~(0xffull);
437  break;
438  case ENCODING_IW:
439  if(immediate & 0x8000)
440  immediate |= ~(0xffffull);
441  break;
442  case ENCODING_ID:
443  if(immediate & 0x80000000)
444  immediate |= ~(0xffffffffull);
445  break;
446  case ENCODING_IO:
447  break;
448  }
449  }
450 
451  switch (type) {
452  case TYPE_XMM:
453  mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4)));
454  return;
455  case TYPE_YMM:
456  mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4)));
457  return;
458  case TYPE_ZMM:
459  mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));
460  return;
461  default:
462  // operand is 64 bits wide. Do nothing.
463  break;
464  }
465 
466  if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
467  insn.immediateOffset, insn.immediateSize,
468  mcInst, Dis))
469  mcInst.addOperand(MCOperand::createImm(immediate));
470 
471  if (type == TYPE_MOFFS) {
472  MCOperand segmentReg;
473  segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
474  mcInst.addOperand(segmentReg);
475  }
476 }
477 
478 /// translateRMRegister - Translates a register stored in the R/M field of the
479 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
480 /// @param mcInst - The MCInst to append to.
481 /// @param insn - The internal instruction to extract the R/M field
482 /// from.
483 /// @return - 0 on success; -1 otherwise
484 static bool translateRMRegister(MCInst &mcInst,
485  InternalInstruction &insn) {
486  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
487  debug("A R/M register operand may not have a SIB byte");
488  return true;
489  }
490 
491  switch (insn.eaBase) {
492  default:
493  debug("Unexpected EA base register");
494  return true;
495  case EA_BASE_NONE:
496  debug("EA_BASE_NONE for ModR/M base");
497  return true;
498 #define ENTRY(x) case EA_BASE_##x:
500 #undef ENTRY
501  debug("A R/M register operand may not have a base; "
502  "the operand must be a register.");
503  return true;
504 #define ENTRY(x) \
505  case EA_REG_##x: \
506  mcInst.addOperand(MCOperand::createReg(X86::x)); break;
507  ALL_REGS
508 #undef ENTRY
509  }
510 
511  return false;
512 }
513 
514 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
515 /// fields of an internal instruction (and possibly its SIB byte) to a memory
516 /// operand in LLVM's format, and appends it to an MCInst.
517 ///
518 /// @param mcInst - The MCInst to append to.
519 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
520 /// from.
521 /// @return - 0 on success; nonzero otherwise
522 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
523  const MCDisassembler *Dis) {
524  // Addresses in an MCInst are represented as five operands:
525  // 1. basereg (register) The R/M base, or (if there is a SIB) the
526  // SIB base
527  // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
528  // scale amount
529  // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
530  // the index (which is multiplied by the
531  // scale amount)
532  // 4. displacement (immediate) 0, or the displacement if there is one
533  // 5. segmentreg (register) x86_registerNONE for now, but could be set
534  // if we have segment overrides
535 
536  MCOperand baseReg;
537  MCOperand scaleAmount;
538  MCOperand indexReg;
539  MCOperand displacement;
540  MCOperand segmentReg;
541  uint64_t pcrel = 0;
542 
543  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
544  if (insn.sibBase != SIB_BASE_NONE) {
545  switch (insn.sibBase) {
546  default:
547  debug("Unexpected sibBase");
548  return true;
549 #define ENTRY(x) \
550  case SIB_BASE_##x: \
551  baseReg = MCOperand::createReg(X86::x); break;
553 #undef ENTRY
554  }
555  } else {
556  baseReg = MCOperand::createReg(X86::NoRegister);
557  }
558 
559  if (insn.sibIndex != SIB_INDEX_NONE) {
560  switch (insn.sibIndex) {
561  default:
562  debug("Unexpected sibIndex");
563  return true;
564 #define ENTRY(x) \
565  case SIB_INDEX_##x: \
566  indexReg = MCOperand::createReg(X86::x); break;
569  REGS_XMM
570  REGS_YMM
571  REGS_ZMM
572 #undef ENTRY
573  }
574  } else {
575  // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,
576  // but no index is used and modrm alone should have been enough.
577  // -No base register in 32-bit mode. In 64-bit mode this is used to
578  // avoid rip-relative addressing.
579  // -Any base register used other than ESP/RSP/R12D/R12. Using these as a
580  // base always requires a SIB byte.
581  // -A scale other than 1 is used.
582  if (insn.sibScale != 1 ||
583  (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||
584  (insn.sibBase != SIB_BASE_NONE &&
585  insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
586  insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12)) {
587  indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ :
588  X86::RIZ);
589  } else
590  indexReg = MCOperand::createReg(X86::NoRegister);
591  }
592 
593  scaleAmount = MCOperand::createImm(insn.sibScale);
594  } else {
595  switch (insn.eaBase) {
596  case EA_BASE_NONE:
597  if (insn.eaDisplacement == EA_DISP_NONE) {
598  debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
599  return true;
600  }
601  if (insn.mode == MODE_64BIT){
602  pcrel = insn.startLocation +
605  insn.displacementOffset,
606  insn.displacement + pcrel, Dis);
607  // Section 2.2.1.6
608  baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP :
609  X86::RIP);
610  }
611  else
612  baseReg = MCOperand::createReg(X86::NoRegister);
613 
614  indexReg = MCOperand::createReg(X86::NoRegister);
615  break;
616  case EA_BASE_BX_SI:
617  baseReg = MCOperand::createReg(X86::BX);
618  indexReg = MCOperand::createReg(X86::SI);
619  break;
620  case EA_BASE_BX_DI:
621  baseReg = MCOperand::createReg(X86::BX);
622  indexReg = MCOperand::createReg(X86::DI);
623  break;
624  case EA_BASE_BP_SI:
625  baseReg = MCOperand::createReg(X86::BP);
626  indexReg = MCOperand::createReg(X86::SI);
627  break;
628  case EA_BASE_BP_DI:
629  baseReg = MCOperand::createReg(X86::BP);
630  indexReg = MCOperand::createReg(X86::DI);
631  break;
632  default:
633  indexReg = MCOperand::createReg(X86::NoRegister);
634  switch (insn.eaBase) {
635  default:
636  debug("Unexpected eaBase");
637  return true;
638  // Here, we will use the fill-ins defined above. However,
639  // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
640  // sib and sib64 were handled in the top-level if, so they're only
641  // placeholders to keep the compiler happy.
642 #define ENTRY(x) \
643  case EA_BASE_##x: \
644  baseReg = MCOperand::createReg(X86::x); break;
646 #undef ENTRY
647 #define ENTRY(x) case EA_REG_##x:
648  ALL_REGS
649 #undef ENTRY
650  debug("A R/M memory operand may not be a register; "
651  "the base field must be a base.");
652  return true;
653  }
654  }
655 
656  scaleAmount = MCOperand::createImm(1);
657  }
658 
659  displacement = MCOperand::createImm(insn.displacement);
660 
661  segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
662 
663  mcInst.addOperand(baseReg);
664  mcInst.addOperand(scaleAmount);
665  mcInst.addOperand(indexReg);
666  if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
668  insn.displacementSize, mcInst, Dis))
669  mcInst.addOperand(displacement);
670  mcInst.addOperand(segmentReg);
671  return false;
672 }
673 
674 /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
675 /// byte of an instruction to LLVM form, and appends it to an MCInst.
676 ///
677 /// @param mcInst - The MCInst to append to.
678 /// @param operand - The operand, as stored in the descriptor table.
679 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
680 /// from.
681 /// @return - 0 on success; nonzero otherwise
682 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
683  InternalInstruction &insn, const MCDisassembler *Dis) {
684  switch (operand.type) {
685  default:
686  debug("Unexpected type for a R/M operand");
687  return true;
688  case TYPE_R8:
689  case TYPE_R16:
690  case TYPE_R32:
691  case TYPE_R64:
692  case TYPE_Rv:
693  case TYPE_MM64:
694  case TYPE_XMM:
695  case TYPE_YMM:
696  case TYPE_ZMM:
697  case TYPE_VK_PAIR:
698  case TYPE_VK:
699  case TYPE_DEBUGREG:
700  case TYPE_CONTROLREG:
701  case TYPE_BNDR:
702  return translateRMRegister(mcInst, insn);
703  case TYPE_M:
704  case TYPE_MVSIBX:
705  case TYPE_MVSIBY:
706  case TYPE_MVSIBZ:
707  return translateRMMemory(mcInst, insn, Dis);
708  }
709 }
710 
711 /// translateFPRegister - Translates a stack position on the FPU stack to its
712 /// LLVM form, and appends it to an MCInst.
713 ///
714 /// @param mcInst - The MCInst to append to.
715 /// @param stackPos - The stack position to translate.
716 static void translateFPRegister(MCInst &mcInst,
717  uint8_t stackPos) {
718  mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos));
719 }
720 
721 /// translateMaskRegister - Translates a 3-bit mask register number to
722 /// LLVM form, and appends it to an MCInst.
723 ///
724 /// @param mcInst - The MCInst to append to.
725 /// @param maskRegNum - Number of mask register from 0 to 7.
726 /// @return - false on success; true otherwise.
727 static bool translateMaskRegister(MCInst &mcInst,
728  uint8_t maskRegNum) {
729  if (maskRegNum >= 8) {
730  debug("Invalid mask register number");
731  return true;
732  }
733 
734  mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum));
735  return false;
736 }
737 
738 /// translateOperand - Translates an operand stored in an internal instruction
739 /// to LLVM's format and appends it to an MCInst.
740 ///
741 /// @param mcInst - The MCInst to append to.
742 /// @param operand - The operand, as stored in the descriptor table.
743 /// @param insn - The internal instruction.
744 /// @return - false on success; true otherwise.
745 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
746  InternalInstruction &insn,
747  const MCDisassembler *Dis) {
748  switch (operand.encoding) {
749  default:
750  debug("Unhandled operand encoding during translation");
751  return true;
752  case ENCODING_REG:
753  translateRegister(mcInst, insn.reg);
754  return false;
755  case ENCODING_WRITEMASK:
756  return translateMaskRegister(mcInst, insn.writemask);
759  return translateRM(mcInst, operand, insn, Dis);
760  case ENCODING_IB:
761  case ENCODING_IW:
762  case ENCODING_ID:
763  case ENCODING_IO:
764  case ENCODING_Iv:
765  case ENCODING_Ia:
766  translateImmediate(mcInst,
767  insn.immediates[insn.numImmediatesTranslated++],
768  operand,
769  insn,
770  Dis);
771  return false;
772  case ENCODING_IRC:
773  mcInst.addOperand(MCOperand::createImm(insn.RC));
774  return false;
775  case ENCODING_SI:
776  return translateSrcIndex(mcInst, insn);
777  case ENCODING_DI:
778  return translateDstIndex(mcInst, insn);
779  case ENCODING_RB:
780  case ENCODING_RW:
781  case ENCODING_RD:
782  case ENCODING_RO:
783  case ENCODING_Rv:
784  translateRegister(mcInst, insn.opcodeRegister);
785  return false;
786  case ENCODING_CC:
787  mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));
788  return false;
789  case ENCODING_FP:
790  translateFPRegister(mcInst, insn.modRM & 7);
791  return false;
792  case ENCODING_VVVV:
793  translateRegister(mcInst, insn.vvvv);
794  return false;
795  case ENCODING_DUP:
796  return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
797  insn, Dis);
798  }
799 }
800 
801 /// translateInstruction - Translates an internal instruction and all its
802 /// operands to an MCInst.
803 ///
804 /// @param mcInst - The MCInst to populate with the instruction's data.
805 /// @param insn - The internal instruction.
806 /// @return - false on success; true otherwise.
807 static bool translateInstruction(MCInst &mcInst,
808  InternalInstruction &insn,
809  const MCDisassembler *Dis) {
810  if (!insn.spec) {
811  debug("Instruction has no specification");
812  return true;
813  }
814 
815  mcInst.clear();
816  mcInst.setOpcode(insn.instructionID);
817  // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
818  // prefix bytes should be disassembled as xrelease and xacquire then set the
819  // opcode to those instead of the rep and repne opcodes.
820  if (insn.xAcquireRelease) {
821  if(mcInst.getOpcode() == X86::REP_PREFIX)
822  mcInst.setOpcode(X86::XRELEASE_PREFIX);
823  else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
824  mcInst.setOpcode(X86::XACQUIRE_PREFIX);
825  }
826 
827  insn.numImmediatesTranslated = 0;
828 
829  for (const auto &Op : insn.operands) {
830  if (Op.encoding != ENCODING_NONE) {
831  if (translateOperand(mcInst, Op, insn, Dis)) {
832  return true;
833  }
834  }
835  }
836 
837  return false;
838 }
839 
841  const MCSubtargetInfo &STI,
842  MCContext &Ctx) {
843  std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
844  return new X86GenericDisassembler(STI, Ctx, std::move(MII));
845 }
846 
847 extern "C" void LLVMInitializeX86Disassembler() {
848  // Register the disassembler.
853 }
*ViewGraph Emit a dot run run gv on the postscript file
Definition: GraphWriter.h:362
void(* dlog_t)(void *arg, const char *log)
Type for the logging function that the consumer can provide to get debugging output from the decoder...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void clear()
Definition: MCInst.h:188
DecodeStatus
Ternary decode status.
Superclass for all disassemblers.
bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t InstSize) const
MCInstrInfo * createMCInstrInfo() const
createMCInstrInfo - Create a MCInstrInfo implementation.
#define ALL_REGS
static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateImmediate - Appends an immediate operand to an MCInst.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.
Definition: BitVector.h:937
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:115
#define Fail
const FeatureBitset & getFeatureBits() const
Reg
All possible values of the reg field in the ModR/M byte.
#define ALL_EA_BASES
static bool translateInstruction(MCInst &target, InternalInstruction &source, const MCDisassembler *Dis)
translateInstruction - Translates an internal instruction and all its operands to an MCInst...
#define EA_BASES_64BIT
static void translateFPRegister(MCInst &mcInst, uint8_t stackPos)
translateFPRegister - Translates a stack position on the FPU stack to its LLVM form, and appends it to an MCInst.
Context object for machine code objects.
Definition: MCContext.h:65
static void translateRegister(MCInst &mcInst, Reg reg)
translateRegister - Translates an internal register to the appropriate LLVM register, and appends it as an operand to an MCInst.
static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, const void *Decoder)
tryAddingPcLoadReferenceComment - trys to add a comment as to what is being referenced by a load inst...
int decodeInstruction(InternalInstruction *insn, byteReader_t reader, const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, uint64_t startLoc, DisassemblerMode mode)
Decode one instruction and store the decoding results in a buffer provided by the consumer...
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateOperand - Translates an operand stored in an internal instruction to LLVM&#39;s format and appen...
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:158
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
The specification for how to extract and interpret one operand.
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn)
translateSrcIndex - Appends a source index operand to an MCInst.
Container class for subtarget features.
static int regionReader(const void *Arg, uint8_t *Byte, uint64_t Address)
A callback function that wraps the readByte method from Region.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
#define ALL_SIB_BASES
#define CASE_ENCODING_RM
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:23
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static const uint8_t segmentRegnums[SEG_OVERRIDE_max]
void Debug(const char *file, unsigned line, const char *s)
Print a message to debugs()
The x86 internal instruction, which is produced by the decoder.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:50
StringRef GetInstrName(unsigned Opcode, const void *mii)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setFlags(unsigned F)
Definition: MCInst.h:173
void setOpcode(unsigned Op)
Definition: MCInst.h:170
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1158
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateRM - Translates an operand stored in the R/M (and possibly SIB) byte of an instruction to LL...
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, const MCDisassembler *Dis)
translateRMMemory - Translates a memory operand stored in the Mod and R/M fields of an internal instr...
#define CASE_ENCODING_VSIB
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void LLVMInitializeX86Disassembler()
#define debug(s)
Target - Wrapper for Target specific information.
#define Success
#define REGS_ZMM
static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn)
translateDstIndex - Appends a destination index operand to an MCInst.
static bool translateRMRegister(MCInst &mcInst, InternalInstruction &insn)
translateRMRegister - Translates a register stored in the R/M field of the ModR/M byte to its LLVM eq...
static MCDisassembler * createX86Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
Generic base class for all target subtargets.
#define REGS_XMM
static void logger(void *arg, const char *log)
logger - a callback function that wraps the operator<< method from raw_ostream.
Target & getTheX86_32Target()
uint32_t Size
Definition: Profile.cpp:46
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isBranch(unsigned Opcode)
LLVM Value Representation.
Definition: Value.h:73
raw_ostream & nulls()
This returns a reference to a raw_ostream which simply discards output.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
IRTranslator LLVM IR MI
void addOperand(const MCOperand &Op)
Definition: MCInst.h:183
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, uint64_t Address, uint64_t Offset, uint64_t Width, MCInst &MI, const MCDisassembler *Dis)
tryAddingSymbolicOperand - trys to add a symbolic operand in place of the immediate Value in the MCIn...
#define REGS_YMM
unsigned getOpcode() const
Definition: MCInst.h:171
#define EA_BASES_32BIT
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:34
Target & getTheX86_64Target()
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:122
static bool translateMaskRegister(MCInst &mcInst, uint8_t maskRegNum)
translateMaskRegister - Translates a 3-bit mask register number to LLVM form, and appends it to an MC...
DisassemblerMode
Decoding mode for the Intel disassembler.