1//===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MCTargetDesc/BPFMCTargetDesc.h"
10#include "TargetInfo/BPFTargetInfo.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/StringSwitch.h"
13#include "llvm/MC/MCContext.h"
14#include "llvm/MC/MCExpr.h"
15#include "llvm/MC/MCInst.h"
16#include "llvm/MC/MCParser/MCAsmLexer.h"
17#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
18#include "llvm/MC/MCParser/MCTargetAsmParser.h"
19#include "llvm/MC/MCRegisterInfo.h"
20#include "llvm/MC/MCStreamer.h"
21#include "llvm/MC/MCSubtargetInfo.h"
22#include "llvm/Support/Casting.h"
23#include "llvm/Support/TargetRegistry.h"
24
25using namespace llvm;
26
27namespace {
28struct BPFOperand;
29
30class BPFAsmParser : public MCTargetAsmParser {
31
32  SMLoc getLoc() const { return getParser().getTok().getLoc(); }
33
34  bool PreMatchCheck(OperandVector &Operands);
35
36  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
37                               OperandVector &Operands, MCStreamer &Out,
38                               uint64_t &ErrorInfo,
39                               bool MatchingInlineAsm) override;
40
41  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
42  OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
43                                        SMLoc &EndLoc) override;
44
45  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
46                        SMLoc NameLoc, OperandVector &Operands) override;
47
48  bool ParseDirective(AsmToken DirectiveID) override;
49
50  // "=" is used as assignment operator for assembly statment, so can't be used
51  // for symbol assignment.
52  bool equalIsAsmAssignment() override { return false; }
53  // "*" is used for dereferencing memory that it will be the start of
54  // statement.
55  bool starIsStartOfStatement() override { return true; }
56
57#define GET_ASSEMBLER_HEADER
58#include "BPFGenAsmMatcher.inc"
59
60  OperandMatchResultTy parseImmediate(OperandVector &Operands);
61  OperandMatchResultTy parseRegister(OperandVector &Operands);
62  OperandMatchResultTy parseOperandAsOperator(OperandVector &Operands);
63
64public:
65  enum BPFMatchResultTy {
66    Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
67#define GET_OPERAND_DIAGNOSTIC_TYPES
68#include "BPFGenAsmMatcher.inc"
69#undef GET_OPERAND_DIAGNOSTIC_TYPES
70  };
71
72  BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
73               const MCInstrInfo &MII, const MCTargetOptions &Options)
74      : MCTargetAsmParser(Options, STI, MII) {
75    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
76  }
77};
78
79/// BPFOperand - Instances of this class represent a parsed machine
80/// instruction
81struct BPFOperand : public MCParsedAsmOperand {
82
83  enum KindTy {
84    Token,
85    Register,
86    Immediate,
87  } Kind;
88
89  struct RegOp {
90    unsigned RegNum;
91  };
92
93  struct ImmOp {
94    const MCExpr *Val;
95  };
96
97  SMLoc StartLoc, EndLoc;
98  union {
99    StringRef Tok;
100    RegOp Reg;
101    ImmOp Imm;
102  };
103
104  BPFOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
105
106public:
107  BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
108    Kind = o.Kind;
109    StartLoc = o.StartLoc;
110    EndLoc = o.EndLoc;
111
112    switch (Kind) {
113    case Register:
114      Reg = o.Reg;
115      break;
116    case Immediate:
117      Imm = o.Imm;
118      break;
119    case Token:
120      Tok = o.Tok;
121      break;
122    }
123  }
124
125  bool isToken() const override { return Kind == Token; }
126  bool isReg() const override { return Kind == Register; }
127  bool isImm() const override { return Kind == Immediate; }
128  bool isMem() const override { return false; }
129
130  bool isConstantImm() const {
131    return isImm() && isa<MCConstantExpr>(getImm());
132  }
133
134  int64_t getConstantImm() const {
135    const MCExpr *Val = getImm();
136    return static_cast<const MCConstantExpr *>(Val)->getValue();
137  }
138
139  bool isSImm12() const {
140    return (isConstantImm() && isInt<12>(getConstantImm()));
141  }
142
143  /// getStartLoc - Gets location of the first token of this operand
144  SMLoc getStartLoc() const override { return StartLoc; }
145  /// getEndLoc - Gets location of the last token of this operand
146  SMLoc getEndLoc() const override { return EndLoc; }
147
148  unsigned getReg() const override {
149    assert(Kind == Register && "Invalid type access!");
150    return Reg.RegNum;
151  }
152
153  const MCExpr *getImm() const {
154    assert(Kind == Immediate && "Invalid type access!");
155    return Imm.Val;
156  }
157
158  StringRef getToken() const {
159    assert(Kind == Token && "Invalid type access!");
160    return Tok;
161  }
162
163  void print(raw_ostream &OS) const override {
164    switch (Kind) {
165    case Immediate:
166      OS << *getImm();
167      break;
168    case Register:
169      OS << "<register x";
170      OS << getReg() << ">";
171      break;
172    case Token:
173      OS << "'" << getToken() << "'";
174      break;
175    }
176  }
177
178  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
179    assert(Expr && "Expr shouldn't be null!");
180
181    if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
182      Inst.addOperand(MCOperand::createImm(CE->getValue()));
183    else
184      Inst.addOperand(MCOperand::createExpr(Expr));
185  }
186
187  // Used by the TableGen Code
188  void addRegOperands(MCInst &Inst, unsigned N) const {
189    assert(N == 1 && "Invalid number of operands!");
190    Inst.addOperand(MCOperand::createReg(getReg()));
191  }
192
193  void addImmOperands(MCInst &Inst, unsigned N) const {
194    assert(N == 1 && "Invalid number of operands!");
195    addExpr(Inst, getImm());
196  }
197
198  static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
199    auto Op = std::make_unique<BPFOperand>(Token);
200    Op->Tok = Str;
201    Op->StartLoc = S;
202    Op->EndLoc = S;
203    return Op;
204  }
205
206  static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S,
207                                               SMLoc E) {
208    auto Op = std::make_unique<BPFOperand>(Register);
209    Op->Reg.RegNum = RegNo;
210    Op->StartLoc = S;
211    Op->EndLoc = E;
212    return Op;
213  }
214
215  static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
216                                               SMLoc E) {
217    auto Op = std::make_unique<BPFOperand>(Immediate);
218    Op->Imm.Val = Val;
219    Op->StartLoc = S;
220    Op->EndLoc = E;
221    return Op;
222  }
223
224  // Identifiers that can be used at the start of a statment.
225  static bool isValidIdAtStart(StringRef Name) {
226    return StringSwitch<bool>(Name.lower())
227        .Case("if", true)
228        .Case("call", true)
229        .Case("goto", true)
230        .Case("*", true)
231        .Case("exit", true)
232        .Case("lock", true)
233        .Case("ld_pseudo", true)
234        .Default(false);
235  }
236
237  // Identifiers that can be used in the middle of a statment.
238  static bool isValidIdInMiddle(StringRef Name) {
239    return StringSwitch<bool>(Name.lower())
240        .Case("u64", true)
241        .Case("u32", true)
242        .Case("u16", true)
243        .Case("u8", true)
244        .Case("be64", true)
245        .Case("be32", true)
246        .Case("be16", true)
247        .Case("le64", true)
248        .Case("le32", true)
249        .Case("le16", true)
250        .Case("goto", true)
251        .Case("ll", true)
252        .Case("skb", true)
253        .Case("s", true)
254        .Default(false);
255  }
256};
257} // end anonymous namespace.
258
259#define GET_REGISTER_MATCHER
260#define GET_MATCHER_IMPLEMENTATION
261#include "BPFGenAsmMatcher.inc"
262
263bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
264
265  if (Operands.size() == 4) {
266    // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
267    // reg1 must be the same as reg2
268    BPFOperand &Op0 = (BPFOperand &)*Operands[0];
269    BPFOperand &Op1 = (BPFOperand &)*Operands[1];
270    BPFOperand &Op2 = (BPFOperand &)*Operands[2];
271    BPFOperand &Op3 = (BPFOperand &)*Operands[3];
272    if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
273        && Op1.getToken() == "="
274        && (Op2.getToken() == "-" || Op2.getToken() == "be16"
275            || Op2.getToken() == "be32" || Op2.getToken() == "be64"
276            || Op2.getToken() == "le16" || Op2.getToken() == "le32"
277            || Op2.getToken() == "le64")
278        && Op0.getReg() != Op3.getReg())
279      return true;
280  }
281
282  return false;
283}
284
285bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
286                                           OperandVector &Operands,
287                                           MCStreamer &Out, uint64_t &ErrorInfo,
288                                           bool MatchingInlineAsm) {
289  MCInst Inst;
290  SMLoc ErrorLoc;
291
292  if (PreMatchCheck(Operands))
293    return Error(IDLoc, "additional inst constraint not met");
294
295  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
296  default:
297    break;
298  case Match_Success:
299    Inst.setLoc(IDLoc);
300    Out.emitInstruction(Inst, getSTI());
301    return false;
302  case Match_MissingFeature:
303    return Error(IDLoc, "instruction use requires an option to be enabled");
304  case Match_MnemonicFail:
305    return Error(IDLoc, "unrecognized instruction mnemonic");
306  case Match_InvalidOperand:
307    ErrorLoc = IDLoc;
308
309    if (ErrorInfo != ~0U) {
310      if (ErrorInfo >= Operands.size())
311        return Error(ErrorLoc, "too few operands for instruction");
312
313      ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
314
315      if (ErrorLoc == SMLoc())
316        ErrorLoc = IDLoc;
317    }
318
319    return Error(ErrorLoc, "invalid operand for instruction");
320  }
321
322  llvm_unreachable("Unknown match type detected!");
323}
324
325bool BPFAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
326                                 SMLoc &EndLoc) {
327  if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
328    return Error(StartLoc, "invalid register name");
329  return false;
330}
331
332OperandMatchResultTy BPFAsmParser::tryParseRegister(unsigned &RegNo,
333                                                    SMLoc &StartLoc,
334                                                    SMLoc &EndLoc) {
335  const AsmToken &Tok = getParser().getTok();
336  StartLoc = Tok.getLoc();
337  EndLoc = Tok.getEndLoc();
338  RegNo = 0;
339  StringRef Name = getLexer().getTok().getIdentifier();
340
341  if (!MatchRegisterName(Name)) {
342    getParser().Lex(); // Eat identifier token.
343    return MatchOperand_Success;
344  }
345
346  return MatchOperand_NoMatch;
347}
348
349OperandMatchResultTy
350BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
351  SMLoc S = getLoc();
352
353  if (getLexer().getKind() == AsmToken::Identifier) {
354    StringRef Name = getLexer().getTok().getIdentifier();
355
356    if (BPFOperand::isValidIdInMiddle(Name)) {
357      getLexer().Lex();
358      Operands.push_back(BPFOperand::createToken(Name, S));
359      return MatchOperand_Success;
360    }
361
362    return MatchOperand_NoMatch;
363  }
364
365  switch (getLexer().getKind()) {
366  case AsmToken::Minus:
367  case AsmToken::Plus: {
368    if (getLexer().peekTok().is(AsmToken::Integer))
369      return MatchOperand_NoMatch;
370    LLVM_FALLTHROUGH;
371  }
372
373  case AsmToken::Equal:
374  case AsmToken::Greater:
375  case AsmToken::Less:
376  case AsmToken::Pipe:
377  case AsmToken::Star:
378  case AsmToken::LParen:
379  case AsmToken::RParen:
380  case AsmToken::LBrac:
381  case AsmToken::RBrac:
382  case AsmToken::Slash:
383  case AsmToken::Amp:
384  case AsmToken::Percent:
385  case AsmToken::Caret: {
386    StringRef Name = getLexer().getTok().getString();
387    getLexer().Lex();
388    Operands.push_back(BPFOperand::createToken(Name, S));
389
390    return MatchOperand_Success;
391  }
392
393  case AsmToken::EqualEqual:
394  case AsmToken::ExclaimEqual:
395  case AsmToken::GreaterEqual:
396  case AsmToken::GreaterGreater:
397  case AsmToken::LessEqual:
398  case AsmToken::LessLess: {
399    Operands.push_back(BPFOperand::createToken(
400        getLexer().getTok().getString().substr(0, 1), S));
401    Operands.push_back(BPFOperand::createToken(
402        getLexer().getTok().getString().substr(1, 1), S));
403    getLexer().Lex();
404
405    return MatchOperand_Success;
406  }
407
408  default:
409    break;
410  }
411
412  return MatchOperand_NoMatch;
413}
414
415OperandMatchResultTy BPFAsmParser::parseRegister(OperandVector &Operands) {
416  SMLoc S = getLoc();
417  SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
418
419  switch (getLexer().getKind()) {
420  default:
421    return MatchOperand_NoMatch;
422  case AsmToken::Identifier:
423    StringRef Name = getLexer().getTok().getIdentifier();
424    unsigned RegNo = MatchRegisterName(Name);
425
426    if (RegNo == 0)
427      return MatchOperand_NoMatch;
428
429    getLexer().Lex();
430    Operands.push_back(BPFOperand::createReg(RegNo, S, E));
431  }
432  return MatchOperand_Success;
433}
434
435OperandMatchResultTy BPFAsmParser::parseImmediate(OperandVector &Operands) {
436  switch (getLexer().getKind()) {
437  default:
438    return MatchOperand_NoMatch;
439  case AsmToken::LParen:
440  case AsmToken::Minus:
441  case AsmToken::Plus:
442  case AsmToken::Integer:
443  case AsmToken::String:
444  case AsmToken::Identifier:
445    break;
446  }
447
448  const MCExpr *IdVal;
449  SMLoc S = getLoc();
450
451  if (getParser().parseExpression(IdVal))
452    return MatchOperand_ParseFail;
453
454  SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
455  Operands.push_back(BPFOperand::createImm(IdVal, S, E));
456
457  return MatchOperand_Success;
458}
459
460/// ParseInstruction - Parse an BPF instruction which is in BPF verifier
461/// format.
462bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
463                                    SMLoc NameLoc, OperandVector &Operands) {
464  // The first operand could be either register or actually an operator.
465  unsigned RegNo = MatchRegisterName(Name);
466
467  if (RegNo != 0) {
468    SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
469    Operands.push_back(BPFOperand::createReg(RegNo, NameLoc, E));
470  } else if (BPFOperand::isValidIdAtStart (Name))
471    Operands.push_back(BPFOperand::createToken(Name, NameLoc));
472  else
473    return Error(NameLoc, "invalid register/token name");
474
475  while (!getLexer().is(AsmToken::EndOfStatement)) {
476    // Attempt to parse token as operator
477    if (parseOperandAsOperator(Operands) == MatchOperand_Success)
478      continue;
479
480    // Attempt to parse token as register
481    if (parseRegister(Operands) == MatchOperand_Success)
482      continue;
483
484    // Attempt to parse token as an immediate
485    if (parseImmediate(Operands) != MatchOperand_Success) {
486      SMLoc Loc = getLexer().getLoc();
487      return Error(Loc, "unexpected token");
488    }
489  }
490
491  if (getLexer().isNot(AsmToken::EndOfStatement)) {
492    SMLoc Loc = getLexer().getLoc();
493
494    getParser().eatToEndOfStatement();
495
496    return Error(Loc, "unexpected token");
497  }
498
499  // Consume the EndOfStatement.
500  getParser().Lex();
501  return false;
502}
503
504bool BPFAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
505
506extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() {
507  RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget());
508  RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget());
509  RegisterMCAsmParser<BPFAsmParser> Z(getTheBPFbeTarget());
510}
511