LLVM 17.0.0git
BPFAsmParser.cpp
Go to the documentation of this file.
1//===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11#include "llvm/ADT/STLExtras.h"
13#include "llvm/MC/MCContext.h"
14#include "llvm/MC/MCExpr.h"
15#include "llvm/MC/MCInst.h"
16#include "llvm/MC/MCInstrInfo.h"
21#include "llvm/MC/MCStreamer.h"
25
26using namespace llvm;
27
28namespace {
29struct BPFOperand;
30
31class BPFAsmParser : public MCTargetAsmParser {
32
33 SMLoc getLoc() const { return getParser().getTok().getLoc(); }
34
35 bool PreMatchCheck(OperandVector &Operands);
36
37 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
40 bool MatchingInlineAsm) override;
41
42 bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
43 SMLoc &EndLoc) override;
45 SMLoc &EndLoc) override;
46
48 SMLoc NameLoc, OperandVector &Operands) override;
49
50 bool ParseDirective(AsmToken DirectiveID) override;
51
52 // "=" is used as assignment operator for assembly statment, so can't be used
53 // for symbol assignment.
54 bool equalIsAsmAssignment() override { return false; }
55 // "*" is used for dereferencing memory that it will be the start of
56 // statement.
57 bool starIsStartOfStatement() override { return true; }
58
59#define GET_ASSEMBLER_HEADER
60#include "BPFGenAsmMatcher.inc"
61
64 OperandMatchResultTy parseOperandAsOperator(OperandVector &Operands);
65
66public:
67 enum BPFMatchResultTy {
68 Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
69#define GET_OPERAND_DIAGNOSTIC_TYPES
70#include "BPFGenAsmMatcher.inc"
71#undef GET_OPERAND_DIAGNOSTIC_TYPES
72 };
73
74 BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
75 const MCInstrInfo &MII, const MCTargetOptions &Options)
76 : MCTargetAsmParser(Options, STI, MII) {
77 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
78 }
79};
80
81/// BPFOperand - Instances of this class represent a parsed machine
82/// instruction
83struct BPFOperand : public MCParsedAsmOperand {
84
85 enum KindTy {
86 Token,
88 Immediate,
89 } Kind;
90
91 struct RegOp {
92 unsigned RegNum;
93 };
94
95 struct ImmOp {
96 const MCExpr *Val;
97 };
98
99 SMLoc StartLoc, EndLoc;
100 union {
101 StringRef Tok;
102 RegOp Reg;
103 ImmOp Imm;
104 };
105
106 BPFOperand(KindTy K) : Kind(K) {}
107
108public:
109 BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
110 Kind = o.Kind;
111 StartLoc = o.StartLoc;
112 EndLoc = o.EndLoc;
113
114 switch (Kind) {
115 case Register:
116 Reg = o.Reg;
117 break;
118 case Immediate:
119 Imm = o.Imm;
120 break;
121 case Token:
122 Tok = o.Tok;
123 break;
124 }
125 }
126
127 bool isToken() const override { return Kind == Token; }
128 bool isReg() const override { return Kind == Register; }
129 bool isImm() const override { return Kind == Immediate; }
130 bool isMem() const override { return false; }
131
132 bool isConstantImm() const {
133 return isImm() && isa<MCConstantExpr>(getImm());
134 }
135
136 int64_t getConstantImm() const {
137 const MCExpr *Val = getImm();
138 return static_cast<const MCConstantExpr *>(Val)->getValue();
139 }
140
141 bool isSImm12() const {
142 return (isConstantImm() && isInt<12>(getConstantImm()));
143 }
144
145 /// getStartLoc - Gets location of the first token of this operand
146 SMLoc getStartLoc() const override { return StartLoc; }
147 /// getEndLoc - Gets location of the last token of this operand
148 SMLoc getEndLoc() const override { return EndLoc; }
149
150 unsigned getReg() const override {
151 assert(Kind == Register && "Invalid type access!");
152 return Reg.RegNum;
153 }
154
155 const MCExpr *getImm() const {
156 assert(Kind == Immediate && "Invalid type access!");
157 return Imm.Val;
158 }
159
160 StringRef getToken() const {
161 assert(Kind == Token && "Invalid type access!");
162 return Tok;
163 }
164
165 void print(raw_ostream &OS) const override {
166 switch (Kind) {
167 case Immediate:
168 OS << *getImm();
169 break;
170 case Register:
171 OS << "<register x";
172 OS << getReg() << ">";
173 break;
174 case Token:
175 OS << "'" << getToken() << "'";
176 break;
177 }
178 }
179
180 void addExpr(MCInst &Inst, const MCExpr *Expr) const {
181 assert(Expr && "Expr shouldn't be null!");
182
183 if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
184 Inst.addOperand(MCOperand::createImm(CE->getValue()));
185 else
187 }
188
189 // Used by the TableGen Code
190 void addRegOperands(MCInst &Inst, unsigned N) const {
191 assert(N == 1 && "Invalid number of operands!");
193 }
194
195 void addImmOperands(MCInst &Inst, unsigned N) const {
196 assert(N == 1 && "Invalid number of operands!");
197 addExpr(Inst, getImm());
198 }
199
200 static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
201 auto Op = std::make_unique<BPFOperand>(Token);
202 Op->Tok = Str;
203 Op->StartLoc = S;
204 Op->EndLoc = S;
205 return Op;
206 }
207
208 static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S,
209 SMLoc E) {
210 auto Op = std::make_unique<BPFOperand>(Register);
211 Op->Reg.RegNum = RegNo;
212 Op->StartLoc = S;
213 Op->EndLoc = E;
214 return Op;
215 }
216
217 static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
218 SMLoc E) {
219 auto Op = std::make_unique<BPFOperand>(Immediate);
220 Op->Imm.Val = Val;
221 Op->StartLoc = S;
222 Op->EndLoc = E;
223 return Op;
224 }
225
226 // Identifiers that can be used at the start of a statment.
227 static bool isValidIdAtStart(StringRef Name) {
228 return StringSwitch<bool>(Name.lower())
229 .Case("if", true)
230 .Case("call", true)
231 .Case("goto", true)
232 .Case("*", true)
233 .Case("exit", true)
234 .Case("lock", true)
235 .Case("ld_pseudo", true)
236 .Default(false);
237 }
238
239 // Identifiers that can be used in the middle of a statment.
240 static bool isValidIdInMiddle(StringRef Name) {
241 return StringSwitch<bool>(Name.lower())
242 .Case("u64", true)
243 .Case("u32", true)
244 .Case("u16", true)
245 .Case("u8", true)
246 .Case("be64", true)
247 .Case("be32", true)
248 .Case("be16", true)
249 .Case("le64", true)
250 .Case("le32", true)
251 .Case("le16", true)
252 .Case("goto", true)
253 .Case("ll", true)
254 .Case("skb", true)
255 .Case("s", true)
256 .Default(false);
257 }
258};
259} // end anonymous namespace.
260
261#define GET_REGISTER_MATCHER
262#define GET_MATCHER_IMPLEMENTATION
263#include "BPFGenAsmMatcher.inc"
264
265bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
266
267 if (Operands.size() == 4) {
268 // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
269 // reg1 must be the same as reg2
270 BPFOperand &Op0 = (BPFOperand &)*Operands[0];
271 BPFOperand &Op1 = (BPFOperand &)*Operands[1];
272 BPFOperand &Op2 = (BPFOperand &)*Operands[2];
273 BPFOperand &Op3 = (BPFOperand &)*Operands[3];
274 if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
275 && Op1.getToken() == "="
276 && (Op2.getToken() == "-" || Op2.getToken() == "be16"
277 || Op2.getToken() == "be32" || Op2.getToken() == "be64"
278 || Op2.getToken() == "le16" || Op2.getToken() == "le32"
279 || Op2.getToken() == "le64")
280 && Op0.getReg() != Op3.getReg())
281 return true;
282 }
283
284 return false;
285}
286
287bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
290 bool MatchingInlineAsm) {
291 MCInst Inst;
292 SMLoc ErrorLoc;
293
294 if (PreMatchCheck(Operands))
295 return Error(IDLoc, "additional inst constraint not met");
296
297 switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
298 default:
299 break;
300 case Match_Success:
301 Inst.setLoc(IDLoc);
302 Out.emitInstruction(Inst, getSTI());
303 return false;
304 case Match_MissingFeature:
305 return Error(IDLoc, "instruction use requires an option to be enabled");
306 case Match_MnemonicFail:
307 return Error(IDLoc, "unrecognized instruction mnemonic");
308 case Match_InvalidOperand:
309 ErrorLoc = IDLoc;
310
311 if (ErrorInfo != ~0U) {
312 if (ErrorInfo >= Operands.size())
313 return Error(ErrorLoc, "too few operands for instruction");
314
315 ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
316
317 if (ErrorLoc == SMLoc())
318 ErrorLoc = IDLoc;
319 }
320
321 return Error(ErrorLoc, "invalid operand for instruction");
322 }
323
324 llvm_unreachable("Unknown match type detected!");
325}
326
327bool BPFAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
328 SMLoc &EndLoc) {
329 if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
330 return Error(StartLoc, "invalid register name");
331 return false;
332}
333
334OperandMatchResultTy BPFAsmParser::tryParseRegister(MCRegister &RegNo,
335 SMLoc &StartLoc,
336 SMLoc &EndLoc) {
337 const AsmToken &Tok = getParser().getTok();
338 StartLoc = Tok.getLoc();
339 EndLoc = Tok.getEndLoc();
340 RegNo = 0;
341 StringRef Name = getLexer().getTok().getIdentifier();
342
343 if (!MatchRegisterName(Name)) {
344 getParser().Lex(); // Eat identifier token.
346 }
347
349}
350
352BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
353 SMLoc S = getLoc();
354
355 if (getLexer().getKind() == AsmToken::Identifier) {
356 StringRef Name = getLexer().getTok().getIdentifier();
357
358 if (BPFOperand::isValidIdInMiddle(Name)) {
359 getLexer().Lex();
360 Operands.push_back(BPFOperand::createToken(Name, S));
362 }
363
365 }
366
367 switch (getLexer().getKind()) {
368 case AsmToken::Minus:
369 case AsmToken::Plus: {
370 if (getLexer().peekTok().is(AsmToken::Integer))
372 [[fallthrough]];
373 }
374
375 case AsmToken::Equal:
377 case AsmToken::Less:
378 case AsmToken::Pipe:
379 case AsmToken::Star:
380 case AsmToken::LParen:
381 case AsmToken::RParen:
382 case AsmToken::LBrac:
383 case AsmToken::RBrac:
384 case AsmToken::Slash:
385 case AsmToken::Amp:
387 case AsmToken::Caret: {
388 StringRef Name = getLexer().getTok().getString();
389 getLexer().Lex();
390 Operands.push_back(BPFOperand::createToken(Name, S));
391
393 }
394
400 case AsmToken::LessLess: {
401 Operands.push_back(BPFOperand::createToken(
402 getLexer().getTok().getString().substr(0, 1), S));
403 Operands.push_back(BPFOperand::createToken(
404 getLexer().getTok().getString().substr(1, 1), S));
405 getLexer().Lex();
406
408 }
409
410 default:
411 break;
412 }
413
415}
416
417OperandMatchResultTy BPFAsmParser::parseRegister(OperandVector &Operands) {
418 SMLoc S = getLoc();
420
421 switch (getLexer().getKind()) {
422 default:
425 StringRef Name = getLexer().getTok().getIdentifier();
426 unsigned RegNo = MatchRegisterName(Name);
427
428 if (RegNo == 0)
430
431 getLexer().Lex();
432 Operands.push_back(BPFOperand::createReg(RegNo, S, E));
433 }
435}
436
437OperandMatchResultTy BPFAsmParser::parseImmediate(OperandVector &Operands) {
438 switch (getLexer().getKind()) {
439 default:
441 case AsmToken::LParen:
442 case AsmToken::Minus:
443 case AsmToken::Plus:
445 case AsmToken::String:
447 break;
448 }
449
450 const MCExpr *IdVal;
451 SMLoc S = getLoc();
452
453 if (getParser().parseExpression(IdVal))
455
457 Operands.push_back(BPFOperand::createImm(IdVal, S, E));
458
460}
461
462/// ParseInstruction - Parse an BPF instruction which is in BPF verifier
463/// format.
464bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
465 SMLoc NameLoc, OperandVector &Operands) {
466 // The first operand could be either register or actually an operator.
467 unsigned RegNo = MatchRegisterName(Name);
468
469 if (RegNo != 0) {
470 SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
471 Operands.push_back(BPFOperand::createReg(RegNo, NameLoc, E));
472 } else if (BPFOperand::isValidIdAtStart (Name))
473 Operands.push_back(BPFOperand::createToken(Name, NameLoc));
474 else
475 return Error(NameLoc, "invalid register/token name");
476
477 while (!getLexer().is(AsmToken::EndOfStatement)) {
478 // Attempt to parse token as operator
479 if (parseOperandAsOperator(Operands) == MatchOperand_Success)
480 continue;
481
482 // Attempt to parse token as register
483 if (parseRegister(Operands) == MatchOperand_Success)
484 continue;
485
486 // Attempt to parse token as an immediate
488 SMLoc Loc = getLexer().getLoc();
489 return Error(Loc, "unexpected token");
490 }
491 }
492
493 if (getLexer().isNot(AsmToken::EndOfStatement)) {
494 SMLoc Loc = getLexer().getLoc();
495
496 getParser().eatToEndOfStatement();
497
498 return Error(Loc, "unexpected token");
499 }
500
501 // Consume the EndOfStatement.
502 getParser().Lex();
503 return false;
504}
505
506bool BPFAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
507
512}
static unsigned MatchRegisterName(StringRef Name)
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser()
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:127
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static LVOptions Options
Definition: LVOptions.cpp:25
mir Rename Register Operands
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
This file contains some templates that are useful if you are working with the STL at all.
static StringRef substr(StringRef Str, uint64_t Len)
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes)
Target independent representation for an assembler token.
Definition: MCAsmMacro.h:21
SMLoc getLoc() const
Definition: MCAsmLexer.cpp:26
SMLoc getEndLoc() const
Definition: MCAsmLexer.cpp:30
Base class for user error types.
Definition: Error.h:348
Lightweight error class with error context and mandatory checking.
Definition: Error.h:156
Generic assembler parser interface, for use by target specific assembly parsers.
Definition: MCAsmParser.h:123
const AsmToken & getTok() const
Get the current AsmToken from the stream.
Definition: MCAsmParser.cpp:40
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
void setLoc(SMLoc loc)
Definition: MCInst.h:203
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
Streaming machine code generation interface.
Definition: MCStreamer.h:212
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
MCTargetAsmParser - Generic interface to target specific assembly parsers.
virtual bool ParseDirective(AsmToken DirectiveID)=0
ParseDirective - Parse a target specific assembler directive.
virtual bool equalIsAsmAssignment()
virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
virtual bool starIsStartOfStatement()
void setAvailableFeatures(const FeatureBitset &Value)
virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands)=0
ParseInstruction - Parse one assembly instruction.
virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm)=0
MatchAndEmitInstruction - Recognize a series of operands of a parsed instruction as an actual MCInst ...
virtual OperandMatchResultTy tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc)=0
tryParseRegister - parse one register if possible
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents a location in source code.
Definition: SMLoc.h:23
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:36
const char * getPointer() const
Definition: SMLoc.h:34
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CE
Windows NT (Windows on ARM)
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
static bool isMem(const MachineInstr &MI, unsigned Op)
Definition: X86InstrInfo.h:131
@ MatchOperand_NoMatch
@ MatchOperand_ParseFail
@ MatchOperand_Success
Target & getTheBPFleTarget()
Target & getTheBPFbeTarget()
Target & getTheBPFTarget()
#define N
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...