LLVM  13.0.0git
MCDisassembler.h
Go to the documentation of this file.
1 //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
10 #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
11 
12 #include "llvm/ADT/Optional.h"
13 #include "llvm/ADT/StringRef.h"
16 #include <cstdint>
17 #include <memory>
18 #include <vector>
19 
20 namespace llvm {
21 
25  bool IsLabel;
27  Optional<uint32_t> Idx, bool Label)
28  : StorageMappingClass(Smc), Index(Idx), IsLabel(Label) {}
29 
30  bool operator<(const XCOFFSymbolInfo &SymInfo) const;
31 };
32 
33 struct SymbolInfoTy {
34  uint64_t Addr;
36  union {
37  uint8_t Type;
39  };
40 
41 private:
42  bool IsXCOFF;
43 
44 public:
47  bool Label)
48  : Addr(Addr), Name(Name), XCOFFSymInfo(Smc, Idx, Label), IsXCOFF(true) {}
49  SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type)
50  : Addr(Addr), Name(Name), Type(Type), IsXCOFF(false) {}
51  bool isXCOFF() const { return IsXCOFF; }
52 
53 private:
54  friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) {
55  assert(P1.IsXCOFF == P2.IsXCOFF &&
56  "P1.IsXCOFF should be equal to P2.IsXCOFF.");
57  if (P1.IsXCOFF)
58  return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) <
59  std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name);
60 
61  return std::tie(P1.Addr, P1.Name, P1.Type) <
62  std::tie(P2.Addr, P2.Name, P2.Type);
63  }
64 };
65 
66 using SectionSymbolsTy = std::vector<SymbolInfoTy>;
67 
68 template <typename T> class ArrayRef;
69 class MCContext;
70 class MCInst;
71 class MCSubtargetInfo;
72 class raw_ostream;
73 
74 /// Superclass for all disassemblers. Consumes a memory region and provides an
75 /// array of assembly instructions.
77 public:
78  /// Ternary decode status. Most backends will just use Fail and
79  /// Success, however some have a concept of an instruction with
80  /// understandable semantics but which is architecturally
81  /// incorrect. An example of this is ARM UNPREDICTABLE instructions
82  /// which are disassemblable but cause undefined behaviour.
83  ///
84  /// Because it makes sense to disassemble these instructions, there
85  /// is a "soft fail" failure mode that indicates the MCInst& is
86  /// valid but architecturally incorrect.
87  ///
88  /// The enum numbers are deliberately chosen such that reduction
89  /// from Success->SoftFail ->Fail can be done with a simple
90  /// bitwise-AND:
91  ///
92  /// LEFT & TOP = | Success Unpredictable Fail
93  /// --------------+-----------------------------------
94  /// Success | Success Unpredictable Fail
95  /// Unpredictable | Unpredictable Unpredictable Fail
96  /// Fail | Fail Fail Fail
97  ///
98  /// An easy way of encoding this is as 0b11, 0b01, 0b00 for
99  /// Success, SoftFail, Fail respectively.
101  Fail = 0,
102  SoftFail = 1,
104  };
105 
107  : Ctx(Ctx), STI(STI) {}
108 
109  virtual ~MCDisassembler();
110 
111  /// Returns the disassembly of a single instruction.
112  ///
113  /// \param Instr - An MCInst to populate with the contents of the
114  /// instruction.
115  /// \param Size - A value to populate with the size of the instruction, or
116  /// the number of bytes consumed while attempting to decode
117  /// an invalid instruction.
118  /// \param Address - The address, in the memory space of region, of the first
119  /// byte of the instruction.
120  /// \param Bytes - A reference to the actual bytes of the instruction.
121  /// \param CStream - The stream to print comments and annotations on.
122  /// \return - MCDisassembler::Success if the instruction is valid,
123  /// MCDisassembler::SoftFail if the instruction was
124  /// disassemblable but invalid,
125  /// MCDisassembler::Fail if the instruction was invalid.
126  virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
127  ArrayRef<uint8_t> Bytes, uint64_t Address,
128  raw_ostream &CStream) const = 0;
129 
130  /// Used to perform separate target specific disassembly for a particular
131  /// symbol. May parse any prelude that precedes instructions after the
132  /// start of a symbol, or the entire symbol.
133  /// This is used for example by WebAssembly to decode preludes.
134  ///
135  /// Base implementation returns None. So all targets by default ignore to
136  /// treat symbols separately.
137  ///
138  /// \param Symbol - The symbol.
139  /// \param Size - The number of bytes consumed.
140  /// \param Address - The address, in the memory space of region, of the first
141  /// byte of the symbol.
142  /// \param Bytes - A reference to the actual bytes at the symbol location.
143  /// \param CStream - The stream to print comments and annotations on.
144  /// \return - MCDisassembler::Success if bytes are decoded
145  /// successfully. Size must hold the number of bytes that
146  /// were decoded.
147  /// - MCDisassembler::Fail if the bytes are invalid. Size
148  /// must hold the number of bytes that were decoded before
149  /// failing. The target must print nothing. This can be
150  /// done by buffering the output if needed.
151  /// - None if the target doesn't want to handle the symbol
152  /// separately. Value of Size is ignored in this case.
153  virtual Optional<DecodeStatus>
155  uint64_t Address, raw_ostream &CStream) const;
156  // TODO:
157  // Implement similar hooks that can be used at other points during
158  // disassembly. Something along the following lines:
159  // - onBeforeInstructionDecode()
160  // - onAfterInstructionDecode()
161  // - onSymbolEnd()
162  // It should help move much of the target specific code from llvm-objdump to
163  // respective target disassemblers.
164 
165 private:
166  MCContext &Ctx;
167 
168 protected:
169  // Subtarget information, for instruction decoding predicates if required.
171  std::unique_ptr<MCSymbolizer> Symbolizer;
172 
173 public:
174  // Helpers around MCSymbolizer
175  bool tryAddingSymbolicOperand(MCInst &Inst,
176  int64_t Value,
177  uint64_t Address, bool IsBranch,
178  uint64_t Offset, uint64_t InstSize) const;
179 
180  void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
181 
182  /// Set \p Symzer as the current symbolizer.
183  /// This takes ownership of \p Symzer, and deletes the previously set one.
184  void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer);
185 
186  MCContext& getContext() const { return Ctx; }
187 
188  const MCSubtargetInfo& getSubtargetInfo() const { return STI; }
189 
190  // Marked mutable because we cache it inside the disassembler, rather than
191  // having to pass it around as an argument through all the autogenerated code.
192  mutable raw_ostream *CommentStream = nullptr;
193 };
194 
195 } // end namespace llvm
196 
197 #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::XCOFFSymbolInfo
Definition: MCDisassembler.h:22
llvm::XCOFFSymbolInfo::operator<
bool operator<(const XCOFFSymbolInfo &SymInfo) const
The function is for symbol sorting when symbols have the same address.
Definition: MCDisassembler.cpp:82
llvm::SymbolInfoTy::SymbolInfoTy
SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type)
Definition: MCDisassembler.h:49
llvm
Definition: AllocatorList.h:23
Optional.h
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:71
llvm::SymbolInfoTy
Definition: MCDisassembler.h:33
StringRef.h
llvm::MCDisassembler::onSymbolStart
virtual Optional< DecodeStatus > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CStream) const
Used to perform separate target specific disassembly for a particular symbol.
Definition: MCDisassembler.cpp:20
llvm::MCDisassembler::SoftFail
@ SoftFail
Definition: MCDisassembler.h:102
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::Optional< XCOFF::StorageMappingClass >
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:183
llvm::XCOFFSymbolInfo::XCOFFSymbolInfo
XCOFFSymbolInfo(Optional< XCOFF::StorageMappingClass > Smc, Optional< uint32_t > Idx, bool Label)
Definition: MCDisassembler.h:26
llvm::XCOFFSymbolInfo::StorageMappingClass
Optional< XCOFF::StorageMappingClass > StorageMappingClass
Definition: MCDisassembler.h:23
XCOFF.h
llvm::SymbolInfoTy::Addr
uint64_t Addr
Definition: MCDisassembler.h:34
llvm::MCDisassembler::MCDisassembler
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
Definition: MCDisassembler.h:106
P2
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the producing something like xmm1 movaps xmm0 ret saving two instructions The basic idea is that a reload from a spill if only one byte chunk is bring in zeros the one element instead of elements This can be used to simplify a variety of shuffle where the elements are fixed zeros This code generates ugly probably due to costs being off or< 4 x float > * P2
Definition: README-SSE.txt:278
llvm::MCDisassembler::Success
@ Success
Definition: MCDisassembler.h:103
llvm::MCDisassembler::CommentStream
raw_ostream * CommentStream
Definition: MCDisassembler.h:192
false
Definition: StackSlotColoring.cpp:142
llvm::SymbolInfoTy::Name
StringRef Name
Definition: MCDisassembler.h:35
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::SectionSymbolsTy
std::vector< SymbolInfoTy > SectionSymbolsTy
Definition: MCDisassembler.h:66
llvm::MCDisassembler::getInstruction
virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CStream) const =0
Returns the disassembly of a single instruction.
llvm::MCDisassembler::DecodeStatus
DecodeStatus
Ternary decode status.
Definition: MCDisassembler.h:100
llvm::MCDisassembler::setSymbolizer
void setSymbolizer(std::unique_ptr< MCSymbolizer > Symzer)
Set Symzer as the current symbolizer.
Definition: MCDisassembler.cpp:42
llvm::XCOFFSymbolInfo::Index
Optional< uint32_t > Index
Definition: MCDisassembler.h:24
llvm::MCDisassembler::STI
const MCSubtargetInfo & STI
Definition: MCDisassembler.h:170
llvm::XCOFF::StorageMappingClass
StorageMappingClass
Storage Mapping Class definitions.
Definition: XCOFF.h:38
llvm::MCDisassembler::~MCDisassembler
virtual ~MCDisassembler()
llvm::MCDisassembler::tryAddingPcLoadReferenceComment
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const
Definition: MCDisassembler.cpp:36
llvm::MCDisassembler
Superclass for all disassemblers.
Definition: MCDisassembler.h:76
llvm::SymbolInfoTy::operator<
friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2)
Definition: MCDisassembler.h:54
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SymbolInfoTy::isXCOFF
bool isXCOFF() const
Definition: MCDisassembler.h:51
MCSymbolizer.h
llvm::MCDisassembler::tryAddingSymbolicOperand
bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t InstSize) const
Definition: MCDisassembler.cpp:26
DecodeStatus
MCDisassembler::DecodeStatus DecodeStatus
Definition: AArch64Disassembler.cpp:36
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::MCDisassembler::Fail
@ Fail
Definition: MCDisassembler.h:101
llvm::MCDisassembler::getContext
MCContext & getContext() const
Definition: MCDisassembler.h:186
llvm::SymbolInfoTy::SymbolInfoTy
SymbolInfoTy(uint64_t Addr, StringRef Name, Optional< XCOFF::StorageMappingClass > Smc, Optional< uint32_t > Idx, bool Label)
Definition: MCDisassembler.h:45
llvm::MCDisassembler::getSubtargetInfo
const MCSubtargetInfo & getSubtargetInfo() const
Definition: MCDisassembler.h:188
llvm::ARMBuildAttrs::Symbol
@ Symbol
Definition: ARMBuildAttributes.h:79
llvm::XCOFFSymbolInfo::IsLabel
bool IsLabel
Definition: MCDisassembler.h:25
SymInfo
SymInfo contains information about symbol: it's address and section index which is -1LL for absolute ...
Definition: DWARFContext.cpp:1422
llvm::SymbolInfoTy::Type
uint8_t Type
Definition: MCDisassembler.h:37
llvm::SymbolInfoTy::XCOFFSymInfo
XCOFFSymbolInfo XCOFFSymInfo
Definition: MCDisassembler.h:38
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1797
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:75
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::MCDisassembler::Symbolizer
std::unique_ptr< MCSymbolizer > Symbolizer
Definition: MCDisassembler.h:171