LLVM 17.0.0git
MCDisassembler.h
Go to the documentation of this file.
1//===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
10#define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
11
12#include "llvm/ADT/StringRef.h"
15#include <cstdint>
16#include <memory>
17#include <vector>
18
19namespace llvm {
20
22 std::optional<XCOFF::StorageMappingClass> StorageMappingClass;
23 std::optional<uint32_t> Index;
24 bool IsLabel = false;
25 bool operator<(const XCOFFSymbolInfoTy &SymInfo) const;
26};
27
31 // XCOFF uses XCOFFSymInfo. Other targets use Type.
33 uint8_t Type;
34
35private:
36 bool IsXCOFF;
37 bool HasType;
38
39public:
41 std::optional<XCOFF::StorageMappingClass> Smc,
42 std::optional<uint32_t> Idx, bool Label)
43 : Addr(Addr), Name(Name), XCOFFSymInfo{Smc, Idx, Label}, Type(0),
44 IsXCOFF(true), HasType(false) {}
46 bool IsXCOFF = false)
47 : Addr(Addr), Name(Name), Type(Type), IsXCOFF(IsXCOFF), HasType(true) {}
48 bool isXCOFF() const { return IsXCOFF; }
49
50private:
51 friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) {
52 assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) &&
53 "The value of IsXCOFF and HasType in P1 and P2 should be the same "
54 "respectively.");
55
56 if (P1.IsXCOFF && P1.HasType)
57 return std::tie(P1.Addr, P1.Type, P1.Name) <
58 std::tie(P2.Addr, P2.Type, P2.Name);
59
60 if (P1.IsXCOFF)
61 return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) <
62 std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name);
63
64 return std::tie(P1.Addr, P1.Name, P1.Type) <
65 std::tie(P2.Addr, P2.Name, P2.Type);
66 }
67};
68
69using SectionSymbolsTy = std::vector<SymbolInfoTy>;
70
71template <typename T> class ArrayRef;
72class MCContext;
73class MCInst;
74class MCSubtargetInfo;
75class raw_ostream;
76
77/// Superclass for all disassemblers. Consumes a memory region and provides an
78/// array of assembly instructions.
80public:
81 /// Ternary decode status. Most backends will just use Fail and
82 /// Success, however some have a concept of an instruction with
83 /// understandable semantics but which is architecturally
84 /// incorrect. An example of this is ARM UNPREDICTABLE instructions
85 /// which are disassemblable but cause undefined behaviour.
86 ///
87 /// Because it makes sense to disassemble these instructions, there
88 /// is a "soft fail" failure mode that indicates the MCInst& is
89 /// valid but architecturally incorrect.
90 ///
91 /// The enum numbers are deliberately chosen such that reduction
92 /// from Success->SoftFail ->Fail can be done with a simple
93 /// bitwise-AND:
94 ///
95 /// LEFT & TOP = | Success Unpredictable Fail
96 /// --------------+-----------------------------------
97 /// Success | Success Unpredictable Fail
98 /// Unpredictable | Unpredictable Unpredictable Fail
99 /// Fail | Fail Fail Fail
100 ///
101 /// An easy way of encoding this is as 0b11, 0b01, 0b00 for
102 /// Success, SoftFail, Fail respectively.
104 Fail = 0,
106 Success = 3
107 };
108
110 : Ctx(Ctx), STI(STI) {}
111
113
114 /// Returns the disassembly of a single instruction.
115 ///
116 /// \param Instr - An MCInst to populate with the contents of the
117 /// instruction.
118 /// \param Size - A value to populate with the size of the instruction, or
119 /// the number of bytes consumed while attempting to decode
120 /// an invalid instruction.
121 /// \param Address - The address, in the memory space of region, of the first
122 /// byte of the instruction.
123 /// \param Bytes - A reference to the actual bytes of the instruction.
124 /// \param CStream - The stream to print comments and annotations on.
125 /// \return - MCDisassembler::Success if the instruction is valid,
126 /// MCDisassembler::SoftFail if the instruction was
127 /// disassemblable but invalid,
128 /// MCDisassembler::Fail if the instruction was invalid.
131 raw_ostream &CStream) const = 0;
132
133 /// Used to perform separate target specific disassembly for a particular
134 /// symbol. May parse any prelude that precedes instructions after the
135 /// start of a symbol, or the entire symbol.
136 /// This is used for example by WebAssembly to decode preludes.
137 ///
138 /// Base implementation returns std::nullopt. So all targets by default ignore
139 /// to treat symbols separately.
140 ///
141 /// \param Symbol - The symbol.
142 /// \param Size - The number of bytes consumed.
143 /// \param Address - The address, in the memory space of region, of the first
144 /// byte of the symbol.
145 /// \param Bytes - A reference to the actual bytes at the symbol location.
146 /// \param CStream - The stream to print comments and annotations on.
147 /// \return - MCDisassembler::Success if bytes are decoded
148 /// successfully. Size must hold the number of bytes that
149 /// were decoded.
150 /// - MCDisassembler::Fail if the bytes are invalid. Size
151 /// must hold the number of bytes that were decoded before
152 /// failing. The target must print nothing. This can be
153 /// done by buffering the output if needed.
154 /// - std::nullopt if the target doesn't want to handle the
155 /// symbol separately. Value of Size is ignored in this
156 /// case.
157 virtual std::optional<DecodeStatus>
159 uint64_t Address, raw_ostream &CStream) const;
160 // TODO:
161 // Implement similar hooks that can be used at other points during
162 // disassembly. Something along the following lines:
163 // - onBeforeInstructionDecode()
164 // - onAfterInstructionDecode()
165 // - onSymbolEnd()
166 // It should help move much of the target specific code from llvm-objdump to
167 // respective target disassemblers.
168
169 /// Suggest a distance to skip in a buffer of data to find the next
170 /// place to look for the start of an instruction. For example, if
171 /// all instructions have a fixed alignment, this might advance to
172 /// the next multiple of that alignment.
173 ///
174 /// If not overridden, the default is 1.
175 ///
176 /// \param Address - The address, in the memory space of region, of the
177 /// starting point (typically the first byte of something
178 /// that did not decode as a valid instruction at all).
179 /// \param Bytes - A reference to the actual bytes at Address. May be
180 /// needed in order to determine the width of an
181 /// unrecognized instruction (e.g. in Thumb this is a simple
182 /// consistent criterion that doesn't require knowing the
183 /// specific instruction). The caller can pass as much data
184 /// as they have available, and the function is required to
185 /// make a reasonable default choice if not enough data is
186 /// available to make a better one.
187 /// \return - A number of bytes to skip. Must always be greater than
188 /// zero. May be greater than the size of Bytes.
190 uint64_t Address) const;
191
192private:
193 MCContext &Ctx;
194
195protected:
196 // Subtarget information, for instruction decoding predicates if required.
198 std::unique_ptr<MCSymbolizer> Symbolizer;
199
200public:
201 // Helpers around MCSymbolizer
203 bool IsBranch, uint64_t Offset, uint64_t OpSize,
204 uint64_t InstSize) const;
205
207
208 /// Set \p Symzer as the current symbolizer.
209 /// This takes ownership of \p Symzer, and deletes the previously set one.
210 void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer);
211
212 MCContext& getContext() const { return Ctx; }
213
214 const MCSubtargetInfo& getSubtargetInfo() const { return STI; }
215
216 // Marked mutable because we cache it inside the disassembler, rather than
217 // having to pass it around as an argument through all the autogenerated code.
218 mutable raw_ostream *CommentStream = nullptr;
219};
220
221} // end namespace llvm
222
223#endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
basic Basic Alias true
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Context object for machine code objects.
Definition: MCContext.h:76
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
virtual std::optional< DecodeStatus > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CStream) const
Used to perform separate target specific disassembly for a particular symbol.
MCContext & getContext() const
bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) const
const MCSubtargetInfo & getSubtargetInfo() const
std::unique_ptr< MCSymbolizer > Symbolizer
const MCSubtargetInfo & STI
raw_ostream * CommentStream
void setSymbolizer(std::unique_ptr< MCSymbolizer > Symzer)
Set Symzer as the current symbolizer.
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const
virtual uint64_t suggestBytesToSkip(ArrayRef< uint8_t > Bytes, uint64_t Address) const
Suggest a distance to skip in a buffer of data to find the next place to look for the start of an ins...
DecodeStatus
Ternary decode status.
virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CStream) const =0
Returns the disassembly of a single instruction.
virtual ~MCDisassembler()
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
Generic base class for all target subtargets.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
std::vector< SymbolInfoTy > SectionSymbolsTy
SymInfo contains information about symbol: it's address and section index which is -1LL for absolute ...
XCOFFSymbolInfoTy XCOFFSymInfo
SymbolInfoTy(uint64_t Addr, StringRef Name, std::optional< XCOFF::StorageMappingClass > Smc, std::optional< uint32_t > Idx, bool Label)
SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type, bool IsXCOFF=false)
friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2)
bool isXCOFF() const
bool operator<(const XCOFFSymbolInfoTy &SymInfo) const
The function is for symbol sorting when symbols have the same address.
std::optional< uint32_t > Index
std::optional< XCOFF::StorageMappingClass > StorageMappingClass