94#define DEBUG_TYPE "x86-disassembler"
96#define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
121#include "X86GenDisassemblerTables.inc"
124 uint8_t opcode, uint8_t modRM) {
129 dec = &
ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
132 dec = &
TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
135 dec = &
THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
138 dec = &
THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
141 dec = &
XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
144 dec = &
XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
147 dec = &
XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
154 dec = &
MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
157 dec = &
MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
173 return modRMTable[dec->
instructionIDs + ((modRM & 0x38) >> 3) + 8];
175 case MODRM_SPLITMISC:
188 byte = insn->
bytes[offset];
193 auto r = insn->
bytes;
195 if (offset +
sizeof(
T) > r.size())
203 return insn->
mode ==
MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
229 if ((
byte == 0xf2 ||
byte == 0xf3) && !
peek(insn,
nextByte)) {
238 if (!(
byte == 0xf3 &&
nextByte == 0x90))
256 if (
peek(insn, nnextByte))
326 uint8_t byte1, byte2;
332 if (
peek(insn, byte2)) {
338 ((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) {
368 "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
372 }
else if (
byte == 0xc4) {
374 if (
peek(insn, byte1)) {
403 }
else if (
byte == 0xc5) {
405 if (
peek(insn, byte1)) {
435 }
else if (
byte == 0x8f) {
437 if (
peek(insn, byte1)) {
442 if ((byte1 & 0x38) != 0x0)
474 }
else if (
isREX(insn,
byte)) {
521 sibBaseBase = SIB_BASE_EAX;
525 sibBaseBase = SIB_BASE_RAX;
604 uint8_t mod, rm, reg, evexrm;
649 EABase eaBaseBase = EA_BASE_BX_SI;
718 insn->
eaBase = EA_BASE_sib;
741#define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
742 static uint16_t name(struct InternalInstruction *insn, OperandType type, \
743 uint8_t index, uint8_t *valid) { \
747 debug("Unhandled register type"); \
751 return base + index; \
756 if (insn->rexPrefix && index >= 4 && index <= 7) { \
757 return prefix##_SPL + (index - 4); \
759 return prefix##_AL + index; \
765 return prefix##_AX + index; \
770 return prefix##_EAX + index; \
775 return prefix##_RAX + index; \
777 return prefix##_ZMM0 + index; \
779 return prefix##_YMM0 + index; \
781 return prefix##_XMM0 + index; \
785 return prefix##_TMM0 + index; \
790 return prefix##_K0 + index; \
794 return prefix##_K0_K1 + (index / 2); \
796 return prefix##_MM0 + (index & 0x7); \
797 case TYPE_SEGMENTREG: \
798 if ((index & 7) > 5) \
800 return prefix##_ES + (index & 7); \
801 case TYPE_DEBUGREG: \
802 return prefix##_DR0 + index; \
803 case TYPE_CONTROLREG: \
804 return prefix##_CR0 + index; \
806 return prefix##_XMM0 + index; \
808 return prefix##_YMM0 + index; \
810 return prefix##_ZMM0 + index; \
841 debug(
"Expected a REG or R/M encoding in fixupReg");
851 insn->reg - insn->regBase, &valid);
857 if (insn->eaBase >= insn->eaRegBase) {
858 insn->eaBase = (
EABase)fixupRMValue(
859 insn, (
OperandType)
op->type, insn->eaBase - insn->eaRegBase, &valid);
880 dbgs() <<
format(
"Unhandled mmm field for instruction (0x%hhx)",
903 dbgs() <<
format(
"Unhandled m-mmmm field for instruction (0x%hhx)",
929 dbgs() <<
format(
"Unhandled m-mmmm field for instruction (0x%hhx)",
947 if (current == 0x0f) {
949 dbgs() <<
format(
"Found a two-byte escape prefix (0x%hhx)", current));
953 if (current == 0x38) {
960 }
else if (current == 0x3a) {
967 }
else if (current == 0x0f) {
969 dbgs() <<
format(
"Found a 3dnow escape prefix (0x%hhx)", current));
997 for (
int i = 0;; i++) {
998 if (orig[i] ==
'\0' && equiv[i] ==
'\0')
1000 if (orig[i] ==
'\0' || equiv[i] ==
'\0')
1002 if (orig[i] != equiv[i]) {
1003 if ((orig[i] ==
'Q' || orig[i] ==
'L') && equiv[i] ==
'W')
1005 if ((orig[i] ==
'6' || orig[i] ==
'3') && equiv[i] ==
'1')
1007 if ((orig[i] ==
'4' || orig[i] ==
'2') && equiv[i] ==
'6')
1016 for (
int i = 0;; ++i) {
1017 if (
name[i] ==
'\0')
1019 if (
name[i] ==
'6' &&
name[i + 1] ==
'4')
1207 attrMask &= ~ATTR_ADSIZE;
1252 auto SpecName = mii->
getName(instructionIDWithREXW);
1254 if (!
is64Bit(SpecName.data())) {
1315 specName = mii->
getName(instructionID);
1316 specWithOpSizeName = mii->
getName(instructionIDWithOpsize);
1334 uint16_t instructionIDWithNewOpcode;
1357 insn->
spec = specWithNewOpcode;
1399 (
Reg)(MODRM_REG_EAX +
1404 (
Reg)(MODRM_REG_RAX +
1482 insn->
vvvv =
static_cast<Reg>(vvvv);
1504 int hasVVVV, needVVVV;
1511 needVVVV = hasVVVV && (insn->
vvvv != 0);
1513 for (
const auto &Op : x86OperandSets[insn->
spec->
operands]) {
1514 switch (Op.encoding) {
1522 needVVVV = hasVVVV & ((insn->
vvvv & 0xf) != 0);
1527 if (insn->
eaBase != EA_BASE_sib && insn->
eaBase != EA_BASE_sib64)
1542 debug(
"Unhandled VSIB index type");
1560 insn->
displacement *= 1 << (Op.encoding - ENCODING_VSIB);
1564 if (insn->
eaBase != EA_BASE_sib && insn->
eaBase != EA_BASE_sib64)
1579 insn->
displacement *= 1 << (Op.encoding - ENCODING_RM);
1592 if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
1653 case ENCODING_WRITEMASK:
1660 LLVM_DEBUG(
dbgs() <<
"Encountered an operand with an unknown encoding.");
1700 std::unique_ptr<const MCInstrInfo> MII;
1703 std::unique_ptr<const MCInstrInfo> MII);
1715X86GenericDisassembler::X86GenericDisassembler(
1718 std::unique_ptr<const MCInstrInfo> MII)
1721 if (FB[X86::Is16Bit]) {
1724 }
else if (FB[X86::Is32Bit]) {
1727 }
else if (FB[X86::Is64Bit]) {
1738 CommentStream = &CStream;
1754 Insn.operands = x86OperandSets[
Insn.spec->operands];
1765 if (!
Insn.mandatoryPrefix) {
1768 if (
Insn.repeatPrefix == 0xf2)
1770 else if (
Insn.repeatPrefix == 0xf3 &&
1772 Insn.opcode != 0x90)
1774 if (
Insn.hasLockPrefix)
1792#define ENTRY(x) X86::x,
1796 MCPhysReg llvmRegnum = llvmRegnums[reg];
1818 baseRegNo = insn.
hasAdSize ? X86::ESI : X86::RSI;
1820 baseRegNo = insn.
hasAdSize ? X86::SI : X86::ESI;
1823 baseRegNo = insn.
hasAdSize ? X86::ESI : X86::SI;
1843 baseRegNo = insn.
hasAdSize ? X86::EDI : X86::RDI;
1845 baseRegNo = insn.
hasAdSize ? X86::DI : X86::EDI;
1848 baseRegNo = insn.
hasAdSize ? X86::EDI : X86::DI;
1871 if (type == TYPE_REL) {
1882 if(immediate & 0x80)
1883 immediate |= ~(0xffull);
1886 if(immediate & 0x8000)
1887 immediate |= ~(0xffffull);
1890 if(immediate & 0x80000000)
1891 immediate |= ~(0xffffffffull);
1898 if(immediate & 0x80)
1899 immediate |= ~(0xffull);
1902 if(immediate & 0x8000)
1903 immediate |= ~(0xffffull);
1906 if(immediate & 0x80000000)
1907 immediate |= ~(0xffffffffull);
1912 else if (type == TYPE_IMM) {
1917 if(immediate & 0x80)
1918 immediate |= ~(0xffull);
1921 if(immediate & 0x8000)
1922 immediate |= ~(0xffffull);
1925 if(immediate & 0x80000000)
1926 immediate |= ~(0xffffffffull);
1953 if (type == TYPE_MOFFS) {
1968 if (insn.
eaBase == EA_BASE_sib || insn.
eaBase == EA_BASE_sib64) {
1969 debug(
"A R/M register operand may not have a SIB byte");
1975 debug(
"Unexpected EA base register");
1978 debug(
"EA_BASE_NONE for ModR/M base");
1980#define ENTRY(x) case EA_BASE_##x:
1983 debug(
"A R/M register operand may not have a base; "
1984 "the operand must be a register.");
1988 mcInst.addOperand(MCOperand::createReg(X86::x)); break;
2007 bool ForceSIB =
false) {
2027 if (insn.
eaBase == EA_BASE_sib || insn.
eaBase == EA_BASE_sib64) {
2031 debug(
"Unexpected sibBase");
2034 case SIB_BASE_##x: \
2035 baseReg = MCOperand::createReg(X86::x); break;
2046 debug(
"Unexpected sibIndex");
2049 case SIB_INDEX_##x: \
2050 indexReg = MCOperand::createReg(X86::x); break;
2071 insn.
sibBase != SIB_BASE_R12D && insn.
sibBase != SIB_BASE_R12))) {
2083 debug(
"EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2120 debug(
"Unexpected eaBase");
2128 baseReg = MCOperand::createReg(X86::x); break;
2131#define ENTRY(x) case EA_REG_##x:
2134 debug(
"A R/M memory operand may not be a register; "
2135 "the base field must be a base.");
2151 const uint8_t dispSize =
2172 switch (operand.
type) {
2174 debug(
"Unexpected type for a R/M operand");
2189 case TYPE_CONTROLREG:
2219 uint8_t maskRegNum) {
2220 if (maskRegNum >= 8) {
2221 debug(
"Invalid mask register number");
2241 debug(
"Unhandled operand encoding during translation");
2246 case ENCODING_WRITEMASK:
2303 debug(
"Instruction has no specification");
2313 if(mcInst.
getOpcode() == X86::REP_PREFIX)
2315 else if(mcInst.
getOpcode() == X86::REPNE_PREFIX)
2321 for (
const auto &Op : insn.
operands) {
2322 if (Op.encoding != ENCODING_NONE) {
2335 std::unique_ptr<const MCInstrInfo> MII(
T.createMCInstrInfo());
2336 return new X86GenericDisassembler(STI, Ctx, std::move(MII));
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
#define LLVM_EXTERNAL_VISIBILITY
static bool isBranch(unsigned Opcode)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static uint8_t readOpcode(WasmObjectFile::ReadContext &Ctx)
static int nextByte(ArrayRef< uint8_t > Bytes, uint64_t &Size)
static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII)
Check if the instruction is a prefix.
#define CASE_ENCODING_VSIB
#define THREEDNOW_MAP_SYM
#define rFromEVEX2of4(evex)
#define lFromEVEX4of4(evex)
#define l2FromEVEX4of4(evex)
#define rFromVEX2of3(vex)
#define zFromEVEX4of4(evex)
#define bFromXOP2of3(xop)
#define xFromVEX2of3(vex)
#define mmmmmFromVEX2of3(vex)
#define rmFromModRM(modRM)
#define bFromEVEX4of4(evex)
#define rFromVEX2of2(vex)
#define ppFromEVEX3of4(evex)
#define v2FromEVEX4of4(evex)
#define modFromModRM(modRM)
#define rFromXOP2of3(xop)
#define lFromXOP3of3(xop)
#define lFromVEX2of2(vex)
#define scaleFromSIB(sib)
#define regFromModRM(modRM)
#define vvvvFromVEX2of2(vex)
#define ppFromXOP3of3(xop)
#define vvvvFromVEX3of3(vex)
#define r2FromEVEX2of4(evex)
#define xFromXOP2of3(xop)
#define wFromEVEX3of4(evex)
#define bFromVEX2of3(vex)
#define wFromVEX3of3(vex)
#define mmmmmFromXOP2of3(xop)
#define aaaFromEVEX4of4(evex)
#define lFromVEX3of3(vex)
#define mmmFromEVEX2of4(evex)
#define ppFromVEX3of3(vex)
#define bFromEVEX2of4(evex)
#define xFromEVEX2of4(evex)
#define vvvvFromXOP3of3(vex)
#define ppFromVEX2of2(vex)
#define indexFromSIB(sib)
#define wFromXOP3of3(xop)
#define vvvvFromEVEX3of4(evex)
static void translateRegister(MCInst &mcInst, Reg reg)
translateRegister - Translates an internal register to the appropriate LLVM register,...
static int getInstructionID(struct InternalInstruction *insn, const MCInstrInfo *mii)
static bool readOpcode(struct InternalInstruction *insn)
static MCDisassembler * createX86Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static bool translateMaskRegister(MCInst &mcInst, uint8_t maskRegNum)
translateMaskRegister - Translates a 3-bit mask register number to LLVM form, and appends it to an MC...
static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn)
translateDstIndex - Appends a destination index operand to an MCInst.
static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateImmediate - Appends an immediate operand to an MCInst.
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler()
static int readOperands(struct InternalInstruction *insn)
static void translateFPRegister(MCInst &mcInst, uint8_t stackPos)
translateFPRegister - Translates a stack position on the FPU stack to its LLVM form,...
static bool is64Bit(const char *name)
static const uint8_t segmentRegnums[SEG_OVERRIDE_max]
static int readImmediate(struct InternalInstruction *insn, uint8_t size)
static int getInstructionIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)
static int readSIB(struct InternalInstruction *insn)
static bool isREX(struct InternalInstruction *insn, uint8_t prefix)
static int readVVVV(struct InternalInstruction *insn)
static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn)
translateSrcIndex - Appends a source index operand to an MCInst.
static int readMaskRegister(struct InternalInstruction *insn)
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateRM - Translates an operand stored in the R/M (and possibly SIB) byte of an instruction to LL...
static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
static int readDisplacement(struct InternalInstruction *insn)
static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)
static int readModRM(struct InternalInstruction *insn)
static bool is16BitEquivalent(const char *orig, const char *equiv)
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, const MCDisassembler *Dis, bool ForceSIB=false)
translateRMMemory - Translates a memory operand stored in the Mod and R/M fields of an internal instr...
static bool translateInstruction(MCInst &target, InternalInstruction &source, const MCDisassembler *Dis)
translateInstruction - Translates an internal instruction and all its operands to an MCInst.
static bool translateRMRegister(MCInst &mcInst, InternalInstruction &insn)
translateRMRegister - Translates a register stored in the R/M field of the ModR/M byte to its LLVM eq...
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateOperand - Translates an operand stored in an internal instruction to LLVM's format and appen...
static int readPrefixes(struct InternalInstruction *insn)
static bool peek(struct InternalInstruction *insn, uint8_t &byte)
#define GENERIC_FIXUP_FUNC(name, base, prefix, mask)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Container class for subtarget features.
Context object for machine code objects.
Superclass for all disassemblers.
bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) const
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const
DecodeStatus
Ternary decode status.
virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CStream) const =0
Returns the disassembly of a single instruction.
Instances of this class represent a single low-level machine instruction.
unsigned getOpcode() const
void setFlags(unsigned F)
void addOperand(const MCOperand Op)
void setOpcode(unsigned Op)
Interface to description of machine instruction set.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createReg(unsigned Reg)
static MCOperand createImm(int64_t Val)
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
StringRef - Represent a constant reference to a string, i.e.
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Target - Wrapper for Target specific information.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ X86
Windows x64, Windows Itanium (IA-64)
EABase
All possible values of the base field for effective-address computations, a.k.a.
Reg
All possible values of the reg field in the ModR/M byte.
DisassemblerMode
Decoding mode for the Intel disassembler.
SIBBase
All possible values of the SIB base field.
SIBIndex
All possible values of the SIB index field.
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Target & getTheX86_32Target()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Target & getTheX86_64Target()
OpcodeDecision opcodeDecisions[IC_max]
ModRMDecision modRMDecisions[256]
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.
The specification for how to extract and interpret a full instruction and its operands.
The x86 internal instruction, which is produced by the decoder.
ArrayRef< OperandSpecifier > operands
EADisplacement eaDisplacement
uint8_t vectorExtensionPrefix[4]
SegmentOverride segmentOverride
uint8_t numImmediatesConsumed
llvm::ArrayRef< uint8_t > bytes
uint8_t numImmediatesTranslated
const InstructionSpecifier * spec
VectorExtensionType vectorExtensionType
uint8_t displacementOffset
The specification for how to extract and interpret one operand.