LLVM 22.0.0git
AArch64ExternalSymbolizer.cpp
Go to the documentation of this file.
1//===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11#include "llvm/MC/MCContext.h"
12#include "llvm/MC/MCExpr.h"
13#include "llvm/MC/MCInst.h"
15#include "llvm/Support/Format.h"
17
18using namespace llvm;
19
20#define DEBUG_TYPE "aarch64-disassembler"
21
43
44/// tryAddingSymbolicOperand - tryAddingSymbolicOperand tries to add a symbolic
45/// operand in place of the immediate Value in the MCInst. The immediate
46/// Value has not had any PC adjustment made by the caller. If the instruction
47/// is a branch that adds the PC to the immediate Value then isBranch is
48/// Success, else Fail. If GetOpInfo is non-null, then it is called to get any
49/// symbolic information at the Address for this instruction. If that returns
50/// non-zero then the symbolic information it returns is used to create an
51/// MCExpr and that is added as an operand to the MCInst. If GetOpInfo()
52/// returns zero and isBranch is Success then a symbol look up for
53/// Address + Value is done and if a symbol is found an MCExpr is created with
54/// that, else an MCExpr with Address + Value is created. If GetOpInfo()
55/// returns zero and isBranch is Fail then the Opcode of the MCInst is
56/// tested and for ADRP an other instructions that help to load of pointers
57/// a symbol look up is done to see it is returns a specific reference type
58/// to add to the comment stream. This function returns Success if it adds
59/// an operand to the MCInst and Fail otherwise.
61 MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
62 bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
63 if (!SymbolLookUp)
64 return false;
65 // FIXME: This method shares a lot of code with
66 // MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible
67 // refactor the MCExternalSymbolizer interface to allow more of this
68 // implementation to be shared.
69 //
70 struct LLVMOpInfo1 SymbolicOp;
71 memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
72 SymbolicOp.Value = Value;
74 const char *ReferenceName;
75 if (!GetOpInfo || !GetOpInfo(DisInfo, Address, /*Offset=*/0, OpSize, InstSize,
76 1, &SymbolicOp)) {
77 if (IsBranch) {
79 const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
80 Address, &ReferenceName);
81 if (Name) {
82 SymbolicOp.AddSymbol.Name = Name;
83 SymbolicOp.AddSymbol.Present = true;
84 SymbolicOp.Value = 0;
85 } else {
86 SymbolicOp.Value = Address + Value;
87 }
89 CommentStream << "symbol stub for: " << ReferenceName;
90 else if (ReferenceType ==
92 CommentStream << "Objc message: " << ReferenceName;
93 } else if (MI.getOpcode() == AArch64::ADRP) {
95 // otool expects the fully encoded ADRP instruction to be passed in as
96 // the value here, so reconstruct it:
97 const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
98 uint32_t EncodedInst = 0x90000000;
99 EncodedInst |= (Value & 0x3) << 29; // immlo
100 EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi
101 EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg
103 &ReferenceName);
104 CommentStream << format("0x%llx", (0xfffffffffffff000LL & Address) +
105 Value * 0x1000);
106 } else if (MI.getOpcode() == AArch64::ADDXri ||
107 MI.getOpcode() == AArch64::ADDWri ||
108 MI.getOpcode() == AArch64::LDRXui ||
109 MI.getOpcode() == AArch64::LDRWui ||
110 MI.getOpcode() == AArch64::LDRXl ||
111 MI.getOpcode() == AArch64::LDRWl ||
112 MI.getOpcode() == AArch64::ADR) {
113 if (MI.getOpcode() == AArch64::ADDXri ||
114 MI.getOpcode() == AArch64::ADDWri)
116 else if (MI.getOpcode() == AArch64::LDRXui ||
117 MI.getOpcode() == AArch64::LDRWui)
119 if (MI.getOpcode() == AArch64::LDRXl ||
120 MI.getOpcode() == AArch64::LDRWl) {
123 &ReferenceName);
124 } else if (MI.getOpcode() == AArch64::ADR) {
127 &ReferenceName);
128 } else {
129 const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
130 // otool expects the fully encoded ADD/LDR instruction to be passed in
131 // as the value here, so reconstruct it:
132 unsigned EncodedInst;
133 switch (MI.getOpcode()) {
134 case AArch64::ADDXri:
135 EncodedInst = 0x91000000;
136 break;
137 case AArch64::ADDWri:
138 EncodedInst = 0x11000000;
139 break;
140 case AArch64::LDRXui:
141 EncodedInst = 0xF9400000;
142 break;
143 default: // LDRWui
144 EncodedInst = 0xB9400000;
145 break;
146 }
147 EncodedInst |= Value << 10; // imm12 (ADD: imm+shift, LDR: offset)
148 EncodedInst |=
149 MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn
150 EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd
151
153 &ReferenceName);
154 }
156 CommentStream << "literal pool symbol address: " << ReferenceName;
157 else if (ReferenceType ==
159 CommentStream << "literal pool for: \"";
160 CommentStream.write_escaped(ReferenceName);
161 CommentStream << "\"";
162 } else if (ReferenceType ==
164 CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
165 else if (ReferenceType ==
167 CommentStream << "Objc message: " << ReferenceName;
168 else if (ReferenceType ==
170 CommentStream << "Objc message ref: " << ReferenceName;
171 else if (ReferenceType ==
173 CommentStream << "Objc selector ref: " << ReferenceName;
174 else if (ReferenceType ==
176 CommentStream << "Objc class ref: " << ReferenceName;
177 // For these instructions, the SymbolLookUp() above is just to get the
178 // ReferenceType and ReferenceName. We want to make sure not to
179 // fall through so we don't build an MCExpr to leave the disassembly
180 // of the immediate values of these instructions to the InstPrinter.
181 return false;
182 } else {
183 return false;
184 }
185 }
186
187 const MCExpr *Add = nullptr;
188 if (SymbolicOp.AddSymbol.Present) {
189 if (SymbolicOp.AddSymbol.Name) {
190 StringRef Name(SymbolicOp.AddSymbol.Name);
191 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
192 auto Spec = getMachOSpecifier(SymbolicOp.VariantKind);
193 if (Spec != AArch64::S_None)
195 else
197 } else {
199 }
200 }
201
202 const MCExpr *Sub = nullptr;
203 if (SymbolicOp.SubtractSymbol.Present) {
204 if (SymbolicOp.SubtractSymbol.Name) {
205 StringRef Name(SymbolicOp.SubtractSymbol.Name);
206 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
208 } else {
210 }
211 }
212
213 const MCExpr *Off = nullptr;
214 if (SymbolicOp.Value != 0)
215 Off = MCConstantExpr::create(SymbolicOp.Value, Ctx);
216
217 const MCExpr *Expr;
218 if (Sub) {
219 const MCExpr *LHS;
220 if (Add)
222 else
224 if (Off)
225 Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx);
226 else
227 Expr = LHS;
228 } else if (Add) {
229 if (Off)
230 Expr = MCBinaryExpr::createAdd(Add, Off, Ctx);
231 else
232 Expr = Add;
233 } else {
234 if (Off)
235 Expr = Off;
236 else
237 Expr = MCConstantExpr::create(0, Ctx);
238 }
239
240 MI.addOperand(MCOperand::createExpr(Expr));
241
242 return true;
243}
static AArch64::Specifier getMachOSpecifier(uint64_t LLVMDisassembler_VariantKind)
IRTranslator LLVM IR MI
bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
tryAddingSymbolicOperand - tryAddingSymbolicOperand tries to add a symbolic operand in place of the i...
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
void * DisInfo
The pointer to the block of symbolic information for above call back.
LLVMSymbolLookupCallback SymbolLookUp
The function to lookup a symbol name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
uint16_t getEncodingValue(MCRegister Reg) const
Returns the encoding for Reg.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
static const MCUnaryExpr * createMinus(const MCExpr *Expr, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:269
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
raw_ostream & write_escaped(StringRef Str, bool UseHexEscapes=false)
Output Str, turning '\', '\t', ' ', '"', and anything that doesn't satisfy llvm::isPrint into an esca...
#define LLVMDisassembler_VariantKind_ARM64_TLVOFF
#define LLVMDisassembler_ReferenceType_In_ARM64_ADR
#define LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF
#define LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref
#define LLVMDisassembler_VariantKind_ARM64_TLVP
#define LLVMDisassembler_ReferenceType_In_ARM64_ADDXri
#define LLVMDisassembler_ReferenceType_Out_SymbolStub
#define LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref
#define LLVMDisassembler_ReferenceType_In_ARM64_ADRP
#define LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr
#define LLVMDisassembler_ReferenceType_In_Branch
#define LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr
#define LLVMDisassembler_ReferenceType_Out_Objc_Message
#define LLVMDisassembler_VariantKind_ARM64_PAGE
The ARM64 target VariantKinds.
#define LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref
#define LLVMDisassembler_VariantKind_ARM64_PAGEOFF
#define LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref
#define LLVMDisassembler_VariantKind_ARM64_GOTPAGE
#define LLVMDisassembler_ReferenceType_In_ARM64_LDRXl
#define LLVMDisassembler_VariantKind_None
The operand VariantKinds for symbolic disassembly.
#define LLVMDisassembler_ReferenceType_In_ARM64_LDRXui
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
struct LLVMOpInfoSymbol1 SubtractSymbol
struct LLVMOpInfoSymbol1 AddSymbol
uint64_t VariantKind