Go to the documentation of this file.
94 #define DEBUG_TYPE "x86-disassembler"
96 #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
121 #include "X86GenDisassemblerTables.inc"
124 uint8_t opcode, uint8_t modRM) {
129 dec = &
ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
132 dec = &
TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
135 dec = &
THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
138 dec = &
THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
141 dec = &
XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
144 dec = &
XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
147 dec = &
XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
154 dec = &
MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
157 dec = &
MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
173 return modRMTable[dec->
instructionIDs + ((modRM & 0x38) >> 3) + 8];
175 case MODRM_SPLITMISC:
188 byte = insn->
bytes[offset];
193 auto r = insn->
bytes;
195 if (offset +
sizeof(
T) > r.size())
198 for (
unsigned i = 0;
i <
sizeof(
T); ++
i)
206 return insn->
mode ==
MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
232 if ((
byte == 0xf2 ||
byte == 0xf3) && !
peek(insn,
nextByte)) {
241 if (!(
byte == 0xf3 &&
nextByte == 0x90))
259 if (
peek(insn, nnextByte))
329 uint8_t byte1, byte2;
335 if (
peek(insn, byte2)) {
341 ((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) {
371 "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
375 }
else if (
byte == 0xc4) {
377 if (
peek(insn, byte1)) {
406 }
else if (
byte == 0xc5) {
408 if (
peek(insn, byte1)) {
438 }
else if (
byte == 0x8f) {
440 if (
peek(insn, byte1)) {
445 if ((byte1 & 0x38) != 0x0)
477 }
else if (
isREX(insn,
byte)) {
524 sibBaseBase = SIB_BASE_EAX;
528 sibBaseBase = SIB_BASE_RAX;
607 uint8_t mod, rm, reg, evexrm;
652 EABase eaBaseBase = EA_BASE_BX_SI;
721 insn->
eaBase = EA_BASE_sib;
744 #define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
745 static uint16_t name(struct InternalInstruction *insn, OperandType type, \
746 uint8_t index, uint8_t *valid) { \
750 debug("Unhandled register type"); \
754 return base + index; \
759 if (insn->rexPrefix && index >= 4 && index <= 7) { \
760 return prefix##_SPL + (index - 4); \
762 return prefix##_AL + index; \
768 return prefix##_AX + index; \
773 return prefix##_EAX + index; \
778 return prefix##_RAX + index; \
780 return prefix##_ZMM0 + index; \
782 return prefix##_YMM0 + index; \
784 return prefix##_XMM0 + index; \
788 return prefix##_TMM0 + index; \
793 return prefix##_K0 + index; \
797 return prefix##_K0_K1 + (index / 2); \
799 return prefix##_MM0 + (index & 0x7); \
800 case TYPE_SEGMENTREG: \
801 if ((index & 7) > 5) \
803 return prefix##_ES + (index & 7); \
804 case TYPE_DEBUGREG: \
805 return prefix##_DR0 + index; \
806 case TYPE_CONTROLREG: \
807 return prefix##_CR0 + index; \
809 return prefix##_XMM0 + index; \
811 return prefix##_YMM0 + index; \
813 return prefix##_ZMM0 + index; \
844 debug(
"Expected a REG or R/M encoding in fixupReg");
854 insn->reg - insn->regBase, &valid);
860 if (insn->eaBase >= insn->eaRegBase) {
861 insn->eaBase = (
EABase)fixupRMValue(
862 insn, (
OperandType)
op->type, insn->eaBase - insn->eaRegBase, &valid);
883 dbgs() <<
format(
"Unhandled mmm field for instruction (0x%hhx)",
906 dbgs() <<
format(
"Unhandled m-mmmm field for instruction (0x%hhx)",
932 dbgs() <<
format(
"Unhandled m-mmmm field for instruction (0x%hhx)",
950 if (current == 0x0f) {
952 dbgs() <<
format(
"Found a two-byte escape prefix (0x%hhx)", current));
956 if (current == 0x38) {
963 }
else if (current == 0x3a) {
970 }
else if (current == 0x0f) {
972 dbgs() <<
format(
"Found a 3dnow escape prefix (0x%hhx)", current));
1000 for (
int i = 0;;
i++) {
1001 if (orig[
i] ==
'\0' && equiv[
i] ==
'\0')
1003 if (orig[
i] ==
'\0' || equiv[
i] ==
'\0')
1005 if (orig[
i] != equiv[
i]) {
1006 if ((orig[
i] ==
'Q' || orig[
i] ==
'L') && equiv[
i] ==
'W')
1008 if ((orig[
i] ==
'6' || orig[
i] ==
'3') && equiv[
i] ==
'1')
1010 if ((orig[
i] ==
'4' || orig[
i] ==
'2') && equiv[
i] ==
'6')
1019 for (
int i = 0;; ++
i) {
1020 if (
name[
i] ==
'\0')
1255 auto SpecName = mii->
getName(instructionIDWithREXW);
1257 if (!
is64Bit(SpecName.data())) {
1318 specName = mii->
getName(instructionID);
1319 specWithOpSizeName = mii->
getName(instructionIDWithOpsize);
1337 uint16_t instructionIDWithNewOpcode;
1360 insn->
spec = specWithNewOpcode;
1402 (
Reg)(MODRM_REG_EAX +
1407 (
Reg)(MODRM_REG_RAX +
1485 insn->
vvvv =
static_cast<Reg>(vvvv);
1507 int hasVVVV, needVVVV;
1514 needVVVV = hasVVVV && (insn->
vvvv != 0);
1517 switch (
Op.encoding) {
1525 needVVVV = hasVVVV & ((insn->
vvvv & 0xf) != 0);
1530 if (insn->
eaBase != EA_BASE_sib && insn->
eaBase != EA_BASE_sib64)
1545 debug(
"Unhandled VSIB index type");
1567 if (insn->
eaBase != EA_BASE_sib && insn->
eaBase != EA_BASE_sib64)
1595 if (
Op.type == TYPE_XMM ||
Op.type == TYPE_YMM)
1656 case ENCODING_WRITEMASK:
1663 LLVM_DEBUG(
dbgs() <<
"Encountered an operand with an unknown encoding.");
1703 std::unique_ptr<const MCInstrInfo> MII;
1706 std::unique_ptr<const MCInstrInfo> MII);
1718 X86GenericDisassembler::X86GenericDisassembler(
1721 std::unique_ptr<const MCInstrInfo> MII)
1724 if (FB[X86::Is16Bit]) {
1727 }
else if (FB[X86::Is32Bit]) {
1730 }
else if (FB[X86::Is64Bit]) {
1741 CommentStream = &CStream;
1757 Insn.operands = x86OperandSets[
Insn.spec->operands];
1768 if (!
Insn.mandatoryPrefix) {
1771 if (
Insn.repeatPrefix == 0xf2)
1773 else if (
Insn.repeatPrefix == 0xf3 &&
1775 Insn.opcode != 0x90)
1777 if (
Insn.hasLockPrefix)
1795 #define ENTRY(x) X86::x,
1799 MCPhysReg llvmRegnum = llvmRegnums[reg];
1885 if(immediate & 0x80)
1886 immediate |= ~(0xffull);
1889 if(immediate & 0x8000)
1890 immediate |= ~(0xffffull);
1893 if(immediate & 0x80000000)
1894 immediate |= ~(0xffffffffull);
1901 if(immediate & 0x80)
1902 immediate |= ~(0xffull);
1905 if(immediate & 0x8000)
1906 immediate |= ~(0xffffull);
1909 if(immediate & 0x80000000)
1910 immediate |= ~(0xffffffffull);
1915 else if (
type == TYPE_IMM) {
1920 if(immediate & 0x80)
1921 immediate |= ~(0xffull);
1924 if(immediate & 0x8000)
1925 immediate |= ~(0xffffull);
1928 if(immediate & 0x80000000)
1929 immediate |= ~(0xffffffffull);
1956 if (
type == TYPE_MOFFS) {
1971 if (insn.
eaBase == EA_BASE_sib || insn.
eaBase == EA_BASE_sib64) {
1972 debug(
"A R/M register operand may not have a SIB byte");
1978 debug(
"Unexpected EA base register");
1981 debug(
"EA_BASE_NONE for ModR/M base");
1983 #define ENTRY(x) case EA_BASE_##x:
1986 debug(
"A R/M register operand may not have a base; "
1987 "the operand must be a register.");
1991 mcInst.addOperand(MCOperand::createReg(X86::x)); break;
2010 bool ForceSIB =
false) {
2030 if (insn.
eaBase == EA_BASE_sib || insn.
eaBase == EA_BASE_sib64) {
2034 debug(
"Unexpected sibBase");
2037 case SIB_BASE_##x: \
2038 baseReg = MCOperand::createReg(X86::x); break;
2049 debug(
"Unexpected sibIndex");
2052 case SIB_INDEX_##x: \
2053 indexReg = MCOperand::createReg(X86::x); break;
2074 insn.
sibBase != SIB_BASE_R12D && insn.
sibBase != SIB_BASE_R12))) {
2086 debug(
"EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2123 debug(
"Unexpected eaBase");
2131 baseReg = MCOperand::createReg(X86::x); break;
2134 #define ENTRY(x) case EA_REG_##x:
2137 debug(
"A R/M memory operand may not be a register; "
2138 "the base field must be a base.");
2154 const uint8_t dispSize =
2175 switch (operand.
type) {
2177 debug(
"Unexpected type for a R/M operand");
2192 case TYPE_CONTROLREG:
2222 uint8_t maskRegNum) {
2223 if (maskRegNum >= 8) {
2224 debug(
"Invalid mask register number");
2244 debug(
"Unhandled operand encoding during translation");
2249 case ENCODING_WRITEMASK:
2306 debug(
"Instruction has no specification");
2316 if(mcInst.
getOpcode() == X86::REP_PREFIX)
2318 else if(mcInst.
getOpcode() == X86::REPNE_PREFIX)
2325 if (
Op.encoding != ENCODING_NONE) {
2338 std::unique_ptr<const MCInstrInfo> MII(
T.createMCInstrInfo());
2339 return new X86GenericDisassembler(STI, Ctx,
std::move(MII));
SSE Variable shift can be custom lowered to something like which uses a small table unaligned load shuffle instead of going through memory byte
#define vvvvFromVEX3of3(vex)
VectorExtensionType vectorExtensionType
#define bFromXOP2of3(xop)
#define xFromXOP2of3(xop)
This is an optimization pass for GlobalISel generic memory operations.
#define THREEDNOW_MAP_SYM
#define lFromVEX2of2(vex)
#define wFromVEX3of3(vex)
#define rFromEVEX2of4(evex)
ModRMDecision modRMDecisions[256]
#define ppFromXOP3of3(xop)
static MCOperand createImm(int64_t Val)
Context object for machine code objects.
Target & getTheX86_64Target()
static bool is64Bit(const char *name)
Target - Wrapper for Target specific information.
static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateImmediate - Appends an immediate operand to an MCInst.
static bool isREX(struct InternalInstruction *insn, uint8_t prefix)
#define mmmmmFromVEX2of3(vex)
Reg
All possible values of the reg field in the ModR/M byte.
#define ppFromVEX2of2(vex)
bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) const
SegmentOverride segmentOverride
#define modFromModRM(modRM)
#define scaleFromSIB(sib)
static int readModRM(struct InternalInstruction *insn)
static void translateFPRegister(MCInst &mcInst, uint8_t stackPos)
translateFPRegister - Translates a stack position on the FPU stack to its LLVM form,...
static int nextByte(ArrayRef< uint8_t > Bytes, uint64_t &Size)
static MCDisassembler * createX86Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.
#define rFromXOP2of3(xop)
to esp esp setne al movzbw ax esp setg cl movzbw cx cmove cx cl jne LBB1_2 esp ret(also really horrible code on ppc). This is due to the expand code for 64-bit compares. GCC produces multiple branches
Container class for subtarget features.
#define rFromVEX2of3(vex)
static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn)
translateSrcIndex - Appends a source index operand to an MCInst.
#define vvvvFromVEX2of2(vex)
#define mmmFromEVEX2of4(evex)
Instances of this class represent a single low-level machine instruction.
static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)
Note that only the low bits of effective_addr2 are used On bit we don t eliminate the computation of the top half of effective_addr2 because we don t have whole function selection dags On x86
#define rFromVEX2of2(vex)
uint8_t numImmediatesTranslated
bool empty() const
empty - Check if the array is empty.
static bool translateRMRegister(MCInst &mcInst, InternalInstruction &insn)
translateRMRegister - Translates a register stored in the R/M field of the ModR/M byte to its LLVM eq...
static bool peek(struct InternalInstruction *insn, uint8_t &byte)
static int getInstructionIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)
void setOpcode(unsigned Op)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
#define r2FromEVEX2of4(evex)
static int readSIB(struct InternalInstruction *insn)
#define lFromVEX3of3(vex)
#define wFromEVEX3of4(evex)
The x86 internal instruction, which is produced by the decoder.
#define vvvvFromEVEX3of4(evex)
static int readImmediate(struct InternalInstruction *insn, uint8_t size)
const FeatureBitset & getFeatureBits() const
static int readOperands(struct InternalInstruction *insn)
#define ppFromVEX3of3(vex)
This class implements an extremely fast bulk output stream that can only output to a stream.
void setFlags(unsigned F)
SIBIndex
All possible values of the SIB index field.
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateOperand - Translates an operand stored in an internal instruction to LLVM's format and appen...
#define bFromVEX2of3(vex)
DecodeStatus
Ternary decode status.
void addOperand(const MCOperand Op)
SIBBase
All possible values of the SIB base field.
#define GENERIC_FIXUP_FUNC(name, base, prefix, mask)
static int readMaskRegister(struct InternalInstruction *insn)
static int getInstructionID(struct InternalInstruction *insn, const MCInstrInfo *mii)
static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII)
Check if the instruction is a prefix.
Target & getTheX86_32Target()
#define lFromXOP3of3(xop)
#define regFromModRM(modRM)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler()
llvm::ArrayRef< uint8_t > bytes
#define LLVM_EXTERNAL_VISIBILITY
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Superclass for all disassemblers.
#define vvvvFromXOP3of3(vex)
static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
#define mmmmmFromXOP2of3(xop)
EABase
All possible values of the base field for effective-address computations, a.k.a.
OperandType
Operands are tagged with one of the values of this enum.
#define ppFromEVEX3of4(evex)
#define rmFromModRM(modRM)
const InstructionSpecifier * spec
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool translateInstruction(MCInst &target, InternalInstruction &source, const MCDisassembler *Dis)
translateInstruction - Translates an internal instruction and all its operands to an MCInst.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
StandardInstrumentations SI(Debug, VerifyEach)
therefore end up llgh r3 lr r0 br r14 but truncating the load would lh r3 br r14 Functions ret i64 and ought to be implemented ngr r0 br r14 but two address optimizations reverse the order of the AND and ngr r2 lgr r0 br r14 CodeGen SystemZ and ll has several examples of this Out of range displacements are usually handled by loading the full address into a register In many cases it would be better to create an anchor point instead E g i64 base
static bool isBranch(unsigned Opcode)
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
uint8_t numImmediatesConsumed
#define zFromEVEX4of4(evex)
ArrayRef< OperandSpecifier > operands
uint8_t displacementOffset
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
StringRef - Represent a constant reference to a string, i.e.
#define bFromEVEX2of4(evex)
static int readVVVV(struct InternalInstruction *insn)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static MCOperand createReg(unsigned Reg)
if(llvm_vc STREQUAL "") set(fake_version_inc "$
static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn)
translateDstIndex - Appends a destination index operand to an MCInst.
The specification for how to extract and interpret one operand.
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateRM - Translates an operand stored in the R/M (and possibly SIB) byte of an instruction to LL...
EADisplacement eaDisplacement
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
OpcodeDecision opcodeDecisions[IC_max]
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
static bool readOpcode(struct InternalInstruction *insn)
#define v2FromEVEX4of4(evex)
#define aaaFromEVEX4of4(evex)
static int readDisplacement(struct InternalInstruction *insn)
Interface to description of machine instruction set.
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
static void translateRegister(MCInst &mcInst, Reg reg)
translateRegister - Translates an internal register to the appropriate LLVM register,...
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, const MCDisassembler *Dis, bool ForceSIB=false)
translateRMMemory - Translates a memory operand stored in the Mod and R/M fields of an internal instr...
static bool translateMaskRegister(MCInst &mcInst, uint8_t maskRegNum)
translateMaskRegister - Translates a 3-bit mask register number to LLVM form, and appends it to an MC...
#define lFromEVEX4of4(evex)
unsigned getOpcode() const
#define wFromXOP3of3(xop)
DisassemblerMode
Decoding mode for the Intel disassembler.
#define l2FromEVEX4of4(evex)
size_t size() const
size - Get the array size.
const LLVM_NODISCARD char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
static bool is16BitEquivalent(const char *orig, const char *equiv)
static bool consume(InternalInstruction *insn, T &ptr)
#define bFromEVEX4of4(evex)
Instances of this class represent operands of the MCInst class.
#define CASE_ENCODING_VSIB
#define xFromEVEX2of4(evex)
#define xFromVEX2of3(vex)
The specification for how to extract and interpret a full instruction and its operands.
static int readPrefixes(struct InternalInstruction *insn)
#define indexFromSIB(sib)
static const uint8_t segmentRegnums[SEG_OVERRIDE_max]
Unrolling by would eliminate the &in both leading to a net reduction in code size The resultant code would then also be suitable for exit value computation We miss a bunch of rotate opportunities on various including etc On X86
Generic base class for all target subtargets.
uint8_t vectorExtensionPrefix[4]