1//==- WebAssemblyAsmParser.cpp - Assembler for WebAssembly -*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file is part of the WebAssembly Assembler.
11///
12/// It contains code to translate a parsed .s file into MCInsts.
13///
14//===----------------------------------------------------------------------===//
15
16#include "AsmParser/WebAssemblyAsmTypeCheck.h"
17#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
18#include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
19#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
20#include "TargetInfo/WebAssemblyTargetInfo.h"
21#include "WebAssembly.h"
22#include "llvm/MC/MCContext.h"
23#include "llvm/MC/MCExpr.h"
24#include "llvm/MC/MCInst.h"
25#include "llvm/MC/MCInstrInfo.h"
26#include "llvm/MC/MCParser/MCAsmLexer.h"
27#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28#include "llvm/MC/MCParser/MCTargetAsmParser.h"
29#include "llvm/MC/MCSectionWasm.h"
30#include "llvm/MC/MCStreamer.h"
31#include "llvm/MC/MCSubtargetInfo.h"
32#include "llvm/MC/MCSymbol.h"
33#include "llvm/MC/MCSymbolWasm.h"
34#include "llvm/MC/TargetRegistry.h"
35#include "llvm/Support/SourceMgr.h"
36
37using namespace llvm;
38
39#define DEBUG_TYPE "wasm-asm-parser"
40
41static const char *getSubtargetFeatureName(uint64_t Val);
42
43namespace {
44
45/// WebAssemblyOperand - Instances of this class represent the operands in a
46/// parsed Wasm machine instruction.
47struct WebAssemblyOperand : public MCParsedAsmOperand {
48  enum KindTy { Token, Integer, Float, Symbol, BrList } Kind;
49
50  SMLoc StartLoc, EndLoc;
51
52  struct TokOp {
53    StringRef Tok;
54  };
55
56  struct IntOp {
57    int64_t Val;
58  };
59
60  struct FltOp {
61    double Val;
62  };
63
64  struct SymOp {
65    const MCExpr *Exp;
66  };
67
68  struct BrLOp {
69    std::vector<unsigned> List;
70  };
71
72  union {
73    struct TokOp Tok;
74    struct IntOp Int;
75    struct FltOp Flt;
76    struct SymOp Sym;
77    struct BrLOp BrL;
78  };
79
80  WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, TokOp T)
81      : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}
82  WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, IntOp I)
83      : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}
84  WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, FltOp F)
85      : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}
86  WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, SymOp S)
87      : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {}
88  WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End)
89      : Kind(K), StartLoc(Start), EndLoc(End), BrL() {}
90
91  ~WebAssemblyOperand() {
92    if (isBrList())
93      BrL.~BrLOp();
94  }
95
96  bool isToken() const override { return Kind == Token; }
97  bool isImm() const override { return Kind == Integer || Kind == Symbol; }
98  bool isFPImm() const { return Kind == Float; }
99  bool isMem() const override { return false; }
100  bool isReg() const override { return false; }
101  bool isBrList() const { return Kind == BrList; }
102
103  unsigned getReg() const override {
104    llvm_unreachable("Assembly inspects a register operand");
105    return 0;
106  }
107
108  StringRef getToken() const {
109    assert(isToken());
110    return Tok.Tok;
111  }
112
113  SMLoc getStartLoc() const override { return StartLoc; }
114  SMLoc getEndLoc() const override { return EndLoc; }
115
116  void addRegOperands(MCInst &, unsigned) const {
117    // Required by the assembly matcher.
118    llvm_unreachable("Assembly matcher creates register operands");
119  }
120
121  void addImmOperands(MCInst &Inst, unsigned N) const {
122    assert(N == 1 && "Invalid number of operands!");
123    if (Kind == Integer)
124      Inst.addOperand(MCOperand::createImm(Int.Val));
125    else if (Kind == Symbol)
126      Inst.addOperand(MCOperand::createExpr(Sym.Exp));
127    else
128      llvm_unreachable("Should be integer immediate or symbol!");
129  }
130
131  void addFPImmf32Operands(MCInst &Inst, unsigned N) const {
132    assert(N == 1 && "Invalid number of operands!");
133    if (Kind == Float)
134      Inst.addOperand(
135          MCOperand::createSFPImm(bit_cast<uint32_t>(float(Flt.Val))));
136    else
137      llvm_unreachable("Should be float immediate!");
138  }
139
140  void addFPImmf64Operands(MCInst &Inst, unsigned N) const {
141    assert(N == 1 && "Invalid number of operands!");
142    if (Kind == Float)
143      Inst.addOperand(MCOperand::createDFPImm(bit_cast<uint64_t>(Flt.Val)));
144    else
145      llvm_unreachable("Should be float immediate!");
146  }
147
148  void addBrListOperands(MCInst &Inst, unsigned N) const {
149    assert(N == 1 && isBrList() && "Invalid BrList!");
150    for (auto Br : BrL.List)
151      Inst.addOperand(MCOperand::createImm(Br));
152  }
153
154  void print(raw_ostream &OS) const override {
155    switch (Kind) {
156    case Token:
157      OS << "Tok:" << Tok.Tok;
158      break;
159    case Integer:
160      OS << "Int:" << Int.Val;
161      break;
162    case Float:
163      OS << "Flt:" << Flt.Val;
164      break;
165    case Symbol:
166      OS << "Sym:" << Sym.Exp;
167      break;
168    case BrList:
169      OS << "BrList:" << BrL.List.size();
170      break;
171    }
172  }
173};
174
175// Perhaps this should go somewhere common.
176static wasm::WasmLimits DefaultLimits() {
177  return {wasm::WASM_LIMITS_FLAG_NONE, 0, 0};
178}
179
180static MCSymbolWasm *GetOrCreateFunctionTableSymbol(MCContext &Ctx,
181                                                    const StringRef &Name) {
182  MCSymbolWasm *Sym = cast_or_null<MCSymbolWasm>(Ctx.lookupSymbol(Name));
183  if (Sym) {
184    if (!Sym->isFunctionTable())
185      Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table");
186  } else {
187    Sym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(Name));
188    Sym->setFunctionTable();
189    // The default function table is synthesized by the linker.
190    Sym->setUndefined();
191  }
192  return Sym;
193}
194
195class WebAssemblyAsmParser final : public MCTargetAsmParser {
196  MCAsmParser &Parser;
197  MCAsmLexer &Lexer;
198
199  // Much like WebAssemblyAsmPrinter in the backend, we have to own these.
200  std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures;
201  std::vector<std::unique_ptr<std::string>> Names;
202
203  // Order of labels, directives and instructions in a .s file have no
204  // syntactical enforcement. This class is a callback from the actual parser,
205  // and yet we have to be feeding data to the streamer in a very particular
206  // order to ensure a correct binary encoding that matches the regular backend
207  // (the streamer does not enforce this). This "state machine" enum helps
208  // guarantee that correct order.
209  enum ParserState {
210    FileStart,
211    FunctionLabel,
212    FunctionStart,
213    FunctionLocals,
214    Instructions,
215    EndFunction,
216    DataSection,
217  } CurrentState = FileStart;
218
219  // For ensuring blocks are properly nested.
220  enum NestingType {
221    Function,
222    Block,
223    Loop,
224    Try,
225    CatchAll,
226    If,
227    Else,
228    Undefined,
229  };
230  struct Nested {
231    NestingType NT;
232    wasm::WasmSignature Sig;
233  };
234  std::vector<Nested> NestingStack;
235
236  MCSymbolWasm *DefaultFunctionTable = nullptr;
237  MCSymbol *LastFunctionLabel = nullptr;
238
239  bool is64;
240
241  WebAssemblyAsmTypeCheck TC;
242  // Don't type check if -no-type-check was set.
243  bool SkipTypeCheck;
244
245public:
246  WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
247                       const MCInstrInfo &MII, const MCTargetOptions &Options)
248      : MCTargetAsmParser(Options, STI, MII), Parser(Parser),
249        Lexer(Parser.getLexer()), is64(STI.getTargetTriple().isArch64Bit()),
250        TC(Parser, MII, is64), SkipTypeCheck(Options.MCNoTypeCheck) {
251    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
252    // Don't type check if this is inline asm, since that is a naked sequence of
253    // instructions without a function/locals decl.
254    auto &SM = Parser.getSourceManager();
255    auto BufferName =
256        SM.getBufferInfo(SM.getMainFileID()).Buffer->getBufferIdentifier();
257    if (BufferName == "<inline asm>")
258      SkipTypeCheck = true;
259  }
260
261  void Initialize(MCAsmParser &Parser) override {
262    MCAsmParserExtension::Initialize(Parser);
263
264    DefaultFunctionTable = GetOrCreateFunctionTableSymbol(
265        getContext(), "__indirect_function_table");
266    if (!STI->checkFeatures("+reference-types"))
267      DefaultFunctionTable->setOmitFromLinkingSection();
268  }
269
270#define GET_ASSEMBLER_HEADER
271#include "WebAssemblyGenAsmMatcher.inc"
272
273  // TODO: This is required to be implemented, but appears unused.
274  bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override {
275    llvm_unreachable("parseRegister is not implemented.");
276  }
277  ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
278                               SMLoc &EndLoc) override {
279    llvm_unreachable("tryParseRegister is not implemented.");
280  }
281
282  bool error(const Twine &Msg, const AsmToken &Tok) {
283    return Parser.Error(Tok.getLoc(), Msg + Tok.getString());
284  }
285
286  bool error(const Twine &Msg, SMLoc Loc = SMLoc()) {
287    return Parser.Error(Loc.isValid() ? Loc : Lexer.getTok().getLoc(), Msg);
288  }
289
290  void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) {
291    Signatures.push_back(std::move(Sig));
292  }
293
294  StringRef storeName(StringRef Name) {
295    std::unique_ptr<std::string> N = std::make_unique<std::string>(Name);
296    Names.push_back(std::move(N));
297    return *Names.back();
298  }
299
300  std::pair<StringRef, StringRef> nestingString(NestingType NT) {
301    switch (NT) {
302    case Function:
303      return {"function", "end_function"};
304    case Block:
305      return {"block", "end_block"};
306    case Loop:
307      return {"loop", "end_loop"};
308    case Try:
309      return {"try", "end_try/delegate"};
310    case CatchAll:
311      return {"catch_all", "end_try"};
312    case If:
313      return {"if", "end_if"};
314    case Else:
315      return {"else", "end_if"};
316    default:
317      llvm_unreachable("unknown NestingType");
318    }
319  }
320
321  void push(NestingType NT, wasm::WasmSignature Sig = wasm::WasmSignature()) {
322    NestingStack.push_back({NT, Sig});
323  }
324
325  bool pop(StringRef Ins, NestingType NT1, NestingType NT2 = Undefined) {
326    if (NestingStack.empty())
327      return error(Twine("End of block construct with no start: ") + Ins);
328    auto Top = NestingStack.back();
329    if (Top.NT != NT1 && Top.NT != NT2)
330      return error(Twine("Block construct type mismatch, expected: ") +
331                   nestingString(Top.NT).second + ", instead got: " + Ins);
332    TC.setLastSig(Top.Sig);
333    NestingStack.pop_back();
334    return false;
335  }
336
337  // Pop a NestingType and push a new NestingType with the same signature. Used
338  // for if-else and try-catch(_all).
339  bool popAndPushWithSameSignature(StringRef Ins, NestingType PopNT,
340                                   NestingType PushNT) {
341    if (NestingStack.empty())
342      return error(Twine("End of block construct with no start: ") + Ins);
343    auto Sig = NestingStack.back().Sig;
344    if (pop(Ins, PopNT))
345      return true;
346    push(PushNT, Sig);
347    return false;
348  }
349
350  bool ensureEmptyNestingStack(SMLoc Loc = SMLoc()) {
351    auto Err = !NestingStack.empty();
352    while (!NestingStack.empty()) {
353      error(Twine("Unmatched block construct(s) at function end: ") +
354                nestingString(NestingStack.back().NT).first,
355            Loc);
356      NestingStack.pop_back();
357    }
358    return Err;
359  }
360
361  bool isNext(AsmToken::TokenKind Kind) {
362    auto Ok = Lexer.is(Kind);
363    if (Ok)
364      Parser.Lex();
365    return Ok;
366  }
367
368  bool expect(AsmToken::TokenKind Kind, const char *KindName) {
369    if (!isNext(Kind))
370      return error(std::string("Expected ") + KindName + ", instead got: ",
371                   Lexer.getTok());
372    return false;
373  }
374
375  StringRef expectIdent() {
376    if (!Lexer.is(AsmToken::Identifier)) {
377      error("Expected identifier, got: ", Lexer.getTok());
378      return StringRef();
379    }
380    auto Name = Lexer.getTok().getString();
381    Parser.Lex();
382    return Name;
383  }
384
385  bool parseRegTypeList(SmallVectorImpl<wasm::ValType> &Types) {
386    while (Lexer.is(AsmToken::Identifier)) {
387      auto Type = WebAssembly::parseType(Lexer.getTok().getString());
388      if (!Type)
389        return error("unknown type: ", Lexer.getTok());
390      Types.push_back(*Type);
391      Parser.Lex();
392      if (!isNext(AsmToken::Comma))
393        break;
394    }
395    return false;
396  }
397
398  void parseSingleInteger(bool IsNegative, OperandVector &Operands) {
399    auto &Int = Lexer.getTok();
400    int64_t Val = Int.getIntVal();
401    if (IsNegative)
402      Val = -Val;
403    Operands.push_back(std::make_unique<WebAssemblyOperand>(
404        WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(),
405        WebAssemblyOperand::IntOp{Val}));
406    Parser.Lex();
407  }
408
409  bool parseSingleFloat(bool IsNegative, OperandVector &Operands) {
410    auto &Flt = Lexer.getTok();
411    double Val;
412    if (Flt.getString().getAsDouble(Val, false))
413      return error("Cannot parse real: ", Flt);
414    if (IsNegative)
415      Val = -Val;
416    Operands.push_back(std::make_unique<WebAssemblyOperand>(
417        WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(),
418        WebAssemblyOperand::FltOp{Val}));
419    Parser.Lex();
420    return false;
421  }
422
423  bool parseSpecialFloatMaybe(bool IsNegative, OperandVector &Operands) {
424    if (Lexer.isNot(AsmToken::Identifier))
425      return true;
426    auto &Flt = Lexer.getTok();
427    auto S = Flt.getString();
428    double Val;
429    if (S.compare_insensitive("infinity") == 0) {
430      Val = std::numeric_limits<double>::infinity();
431    } else if (S.compare_insensitive("nan") == 0) {
432      Val = std::numeric_limits<double>::quiet_NaN();
433    } else {
434      return true;
435    }
436    if (IsNegative)
437      Val = -Val;
438    Operands.push_back(std::make_unique<WebAssemblyOperand>(
439        WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(),
440        WebAssemblyOperand::FltOp{Val}));
441    Parser.Lex();
442    return false;
443  }
444
445  bool checkForP2AlignIfLoadStore(OperandVector &Operands, StringRef InstName) {
446    // FIXME: there is probably a cleaner way to do this.
447    auto IsLoadStore = InstName.contains(".load") ||
448                       InstName.contains(".store") ||
449                       InstName.contains("prefetch");
450    auto IsAtomic = InstName.contains("atomic.");
451    if (IsLoadStore || IsAtomic) {
452      // Parse load/store operands of the form: offset:p2align=align
453      if (IsLoadStore && isNext(AsmToken::Colon)) {
454        auto Id = expectIdent();
455        if (Id != "p2align")
456          return error("Expected p2align, instead got: " + Id);
457        if (expect(AsmToken::Equal, "="))
458          return true;
459        if (!Lexer.is(AsmToken::Integer))
460          return error("Expected integer constant");
461        parseSingleInteger(false, Operands);
462      } else {
463        // v128.{load,store}{8,16,32,64}_lane has both a memarg and a lane
464        // index. We need to avoid parsing an extra alignment operand for the
465        // lane index.
466        auto IsLoadStoreLane = InstName.contains("_lane");
467        if (IsLoadStoreLane && Operands.size() == 4)
468          return false;
469        // Alignment not specified (or atomics, must use default alignment).
470        // We can't just call WebAssembly::GetDefaultP2Align since we don't have
471        // an opcode until after the assembly matcher, so set a default to fix
472        // up later.
473        auto Tok = Lexer.getTok();
474        Operands.push_back(std::make_unique<WebAssemblyOperand>(
475            WebAssemblyOperand::Integer, Tok.getLoc(), Tok.getEndLoc(),
476            WebAssemblyOperand::IntOp{-1}));
477      }
478    }
479    return false;
480  }
481
482  void addBlockTypeOperand(OperandVector &Operands, SMLoc NameLoc,
483                           WebAssembly::BlockType BT) {
484    if (BT != WebAssembly::BlockType::Void) {
485      wasm::WasmSignature Sig({static_cast<wasm::ValType>(BT)}, {});
486      TC.setLastSig(Sig);
487      NestingStack.back().Sig = Sig;
488    }
489    Operands.push_back(std::make_unique<WebAssemblyOperand>(
490        WebAssemblyOperand::Integer, NameLoc, NameLoc,
491        WebAssemblyOperand::IntOp{static_cast<int64_t>(BT)}));
492  }
493
494  bool parseLimits(wasm::WasmLimits *Limits) {
495    auto Tok = Lexer.getTok();
496    if (!Tok.is(AsmToken::Integer))
497      return error("Expected integer constant, instead got: ", Tok);
498    int64_t Val = Tok.getIntVal();
499    assert(Val >= 0);
500    Limits->Minimum = Val;
501    Parser.Lex();
502
503    if (isNext(AsmToken::Comma)) {
504      Limits->Flags |= wasm::WASM_LIMITS_FLAG_HAS_MAX;
505      auto Tok = Lexer.getTok();
506      if (!Tok.is(AsmToken::Integer))
507        return error("Expected integer constant, instead got: ", Tok);
508      int64_t Val = Tok.getIntVal();
509      assert(Val >= 0);
510      Limits->Maximum = Val;
511      Parser.Lex();
512    }
513    return false;
514  }
515
516  bool parseFunctionTableOperand(std::unique_ptr<WebAssemblyOperand> *Op) {
517    if (STI->checkFeatures("+reference-types")) {
518      // If the reference-types feature is enabled, there is an explicit table
519      // operand.  To allow the same assembly to be compiled with or without
520      // reference types, we allow the operand to be omitted, in which case we
521      // default to __indirect_function_table.
522      auto &Tok = Lexer.getTok();
523      if (Tok.is(AsmToken::Identifier)) {
524        auto *Sym =
525            GetOrCreateFunctionTableSymbol(getContext(), Tok.getString());
526        const auto *Val = MCSymbolRefExpr::create(Sym, getContext());
527        *Op = std::make_unique<WebAssemblyOperand>(
528            WebAssemblyOperand::Symbol, Tok.getLoc(), Tok.getEndLoc(),
529            WebAssemblyOperand::SymOp{Val});
530        Parser.Lex();
531        return expect(AsmToken::Comma, ",");
532      } else {
533        const auto *Val =
534            MCSymbolRefExpr::create(DefaultFunctionTable, getContext());
535        *Op = std::make_unique<WebAssemblyOperand>(
536            WebAssemblyOperand::Symbol, SMLoc(), SMLoc(),
537            WebAssemblyOperand::SymOp{Val});
538        return false;
539      }
540    } else {
541      // For the MVP there is at most one table whose number is 0, but we can't
542      // write a table symbol or issue relocations.  Instead we just ensure the
543      // table is live and write a zero.
544      getStreamer().emitSymbolAttribute(DefaultFunctionTable, MCSA_NoDeadStrip);
545      *Op = std::make_unique<WebAssemblyOperand>(WebAssemblyOperand::Integer,
546                                                 SMLoc(), SMLoc(),
547                                                 WebAssemblyOperand::IntOp{0});
548      return false;
549    }
550  }
551
552  bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name,
553                        SMLoc NameLoc, OperandVector &Operands) override {
554    // Note: Name does NOT point into the sourcecode, but to a local, so
555    // use NameLoc instead.
556    Name = StringRef(NameLoc.getPointer(), Name.size());
557
558    // WebAssembly has instructions with / in them, which AsmLexer parses
559    // as separate tokens, so if we find such tokens immediately adjacent (no
560    // whitespace), expand the name to include them:
561    for (;;) {
562      auto &Sep = Lexer.getTok();
563      if (Sep.getLoc().getPointer() != Name.end() ||
564          Sep.getKind() != AsmToken::Slash)
565        break;
566      // Extend name with /
567      Name = StringRef(Name.begin(), Name.size() + Sep.getString().size());
568      Parser.Lex();
569      // We must now find another identifier, or error.
570      auto &Id = Lexer.getTok();
571      if (Id.getKind() != AsmToken::Identifier ||
572          Id.getLoc().getPointer() != Name.end())
573        return error("Incomplete instruction name: ", Id);
574      Name = StringRef(Name.begin(), Name.size() + Id.getString().size());
575      Parser.Lex();
576    }
577
578    // Now construct the name as first operand.
579    Operands.push_back(std::make_unique<WebAssemblyOperand>(
580        WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()),
581        WebAssemblyOperand::TokOp{Name}));
582
583    // If this instruction is part of a control flow structure, ensure
584    // proper nesting.
585    bool ExpectBlockType = false;
586    bool ExpectFuncType = false;
587    std::unique_ptr<WebAssemblyOperand> FunctionTable;
588    if (Name == "block") {
589      push(Block);
590      ExpectBlockType = true;
591    } else if (Name == "loop") {
592      push(Loop);
593      ExpectBlockType = true;
594    } else if (Name == "try") {
595      push(Try);
596      ExpectBlockType = true;
597    } else if (Name == "if") {
598      push(If);
599      ExpectBlockType = true;
600    } else if (Name == "else") {
601      if (popAndPushWithSameSignature(Name, If, Else))
602        return true;
603    } else if (Name == "catch") {
604      if (popAndPushWithSameSignature(Name, Try, Try))
605        return true;
606    } else if (Name == "catch_all") {
607      if (popAndPushWithSameSignature(Name, Try, CatchAll))
608        return true;
609    } else if (Name == "end_if") {
610      if (pop(Name, If, Else))
611        return true;
612    } else if (Name == "end_try") {
613      if (pop(Name, Try, CatchAll))
614        return true;
615    } else if (Name == "delegate") {
616      if (pop(Name, Try))
617        return true;
618    } else if (Name == "end_loop") {
619      if (pop(Name, Loop))
620        return true;
621    } else if (Name == "end_block") {
622      if (pop(Name, Block))
623        return true;
624    } else if (Name == "end_function") {
625      ensureLocals(getStreamer());
626      CurrentState = EndFunction;
627      if (pop(Name, Function) || ensureEmptyNestingStack())
628        return true;
629    } else if (Name == "call_indirect" || Name == "return_call_indirect") {
630      // These instructions have differing operand orders in the text format vs
631      // the binary formats.  The MC instructions follow the binary format, so
632      // here we stash away the operand and append it later.
633      if (parseFunctionTableOperand(&FunctionTable))
634        return true;
635      ExpectFuncType = true;
636    }
637
638    if (ExpectFuncType || (ExpectBlockType && Lexer.is(AsmToken::LParen))) {
639      // This has a special TYPEINDEX operand which in text we
640      // represent as a signature, such that we can re-build this signature,
641      // attach it to an anonymous symbol, which is what WasmObjectWriter
642      // expects to be able to recreate the actual unique-ified type indices.
643      auto Loc = Parser.getTok();
644      auto Signature = std::make_unique<wasm::WasmSignature>();
645      if (parseSignature(Signature.get()))
646        return true;
647      // Got signature as block type, don't need more
648      TC.setLastSig(*Signature.get());
649      if (ExpectBlockType)
650        NestingStack.back().Sig = *Signature.get();
651      ExpectBlockType = false;
652      auto &Ctx = getContext();
653      // The "true" here will cause this to be a nameless symbol.
654      MCSymbol *Sym = Ctx.createTempSymbol("typeindex", true);
655      auto *WasmSym = cast<MCSymbolWasm>(Sym);
656      WasmSym->setSignature(Signature.get());
657      addSignature(std::move(Signature));
658      WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
659      const MCExpr *Expr = MCSymbolRefExpr::create(
660          WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx);
661      Operands.push_back(std::make_unique<WebAssemblyOperand>(
662          WebAssemblyOperand::Symbol, Loc.getLoc(), Loc.getEndLoc(),
663          WebAssemblyOperand::SymOp{Expr}));
664    }
665
666    while (Lexer.isNot(AsmToken::EndOfStatement)) {
667      auto &Tok = Lexer.getTok();
668      switch (Tok.getKind()) {
669      case AsmToken::Identifier: {
670        if (!parseSpecialFloatMaybe(false, Operands))
671          break;
672        auto &Id = Lexer.getTok();
673        if (ExpectBlockType) {
674          // Assume this identifier is a block_type.
675          auto BT = WebAssembly::parseBlockType(Id.getString());
676          if (BT == WebAssembly::BlockType::Invalid)
677            return error("Unknown block type: ", Id);
678          addBlockTypeOperand(Operands, NameLoc, BT);
679          Parser.Lex();
680        } else {
681          // Assume this identifier is a label.
682          const MCExpr *Val;
683          SMLoc Start = Id.getLoc();
684          SMLoc End;
685          if (Parser.parseExpression(Val, End))
686            return error("Cannot parse symbol: ", Lexer.getTok());
687          Operands.push_back(std::make_unique<WebAssemblyOperand>(
688              WebAssemblyOperand::Symbol, Start, End,
689              WebAssemblyOperand::SymOp{Val}));
690          if (checkForP2AlignIfLoadStore(Operands, Name))
691            return true;
692        }
693        break;
694      }
695      case AsmToken::Minus:
696        Parser.Lex();
697        if (Lexer.is(AsmToken::Integer)) {
698          parseSingleInteger(true, Operands);
699          if (checkForP2AlignIfLoadStore(Operands, Name))
700            return true;
701        } else if (Lexer.is(AsmToken::Real)) {
702          if (parseSingleFloat(true, Operands))
703            return true;
704        } else if (!parseSpecialFloatMaybe(true, Operands)) {
705        } else {
706          return error("Expected numeric constant instead got: ",
707                       Lexer.getTok());
708        }
709        break;
710      case AsmToken::Integer:
711        parseSingleInteger(false, Operands);
712        if (checkForP2AlignIfLoadStore(Operands, Name))
713          return true;
714        break;
715      case AsmToken::Real: {
716        if (parseSingleFloat(false, Operands))
717          return true;
718        break;
719      }
720      case AsmToken::LCurly: {
721        Parser.Lex();
722        auto Op = std::make_unique<WebAssemblyOperand>(
723            WebAssemblyOperand::BrList, Tok.getLoc(), Tok.getEndLoc());
724        if (!Lexer.is(AsmToken::RCurly))
725          for (;;) {
726            Op->BrL.List.push_back(Lexer.getTok().getIntVal());
727            expect(AsmToken::Integer, "integer");
728            if (!isNext(AsmToken::Comma))
729              break;
730          }
731        expect(AsmToken::RCurly, "}");
732        Operands.push_back(std::move(Op));
733        break;
734      }
735      default:
736        return error("Unexpected token in operand: ", Tok);
737      }
738      if (Lexer.isNot(AsmToken::EndOfStatement)) {
739        if (expect(AsmToken::Comma, ","))
740          return true;
741      }
742    }
743    if (ExpectBlockType && Operands.size() == 1) {
744      // Support blocks with no operands as default to void.
745      addBlockTypeOperand(Operands, NameLoc, WebAssembly::BlockType::Void);
746    }
747    if (FunctionTable)
748      Operands.push_back(std::move(FunctionTable));
749    Parser.Lex();
750    return false;
751  }
752
753  bool parseSignature(wasm::WasmSignature *Signature) {
754    if (expect(AsmToken::LParen, "("))
755      return true;
756    if (parseRegTypeList(Signature->Params))
757      return true;
758    if (expect(AsmToken::RParen, ")"))
759      return true;
760    if (expect(AsmToken::MinusGreater, "->"))
761      return true;
762    if (expect(AsmToken::LParen, "("))
763      return true;
764    if (parseRegTypeList(Signature->Returns))
765      return true;
766    if (expect(AsmToken::RParen, ")"))
767      return true;
768    return false;
769  }
770
771  bool CheckDataSection() {
772    if (CurrentState != DataSection) {
773      auto WS = cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
774      if (WS && WS->getKind().isText())
775        return error("data directive must occur in a data segment: ",
776                     Lexer.getTok());
777    }
778    CurrentState = DataSection;
779    return false;
780  }
781
782  // This function processes wasm-specific directives streamed to
783  // WebAssemblyTargetStreamer, all others go to the generic parser
784  // (see WasmAsmParser).
785  ParseStatus parseDirective(AsmToken DirectiveID) override {
786    assert(DirectiveID.getKind() == AsmToken::Identifier);
787    auto &Out = getStreamer();
788    auto &TOut =
789        reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
790    auto &Ctx = Out.getContext();
791
792    if (DirectiveID.getString() == ".globaltype") {
793      auto SymName = expectIdent();
794      if (SymName.empty())
795        return ParseStatus::Failure;
796      if (expect(AsmToken::Comma, ","))
797        return ParseStatus::Failure;
798      auto TypeTok = Lexer.getTok();
799      auto TypeName = expectIdent();
800      if (TypeName.empty())
801        return ParseStatus::Failure;
802      auto Type = WebAssembly::parseType(TypeName);
803      if (!Type)
804        return error("Unknown type in .globaltype directive: ", TypeTok);
805      // Optional mutable modifier. Default to mutable for historical reasons.
806      // Ideally we would have gone with immutable as the default and used `mut`
807      // as the modifier to match the `.wat` format.
808      bool Mutable = true;
809      if (isNext(AsmToken::Comma)) {
810        TypeTok = Lexer.getTok();
811        auto Id = expectIdent();
812        if (Id.empty())
813          return ParseStatus::Failure;
814        if (Id == "immutable")
815          Mutable = false;
816        else
817          // Should we also allow `mutable` and `mut` here for clarity?
818          return error("Unknown type in .globaltype modifier: ", TypeTok);
819      }
820      // Now set this symbol with the correct type.
821      auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
822      WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
823      WasmSym->setGlobalType(wasm::WasmGlobalType{uint8_t(*Type), Mutable});
824      // And emit the directive again.
825      TOut.emitGlobalType(WasmSym);
826      return expect(AsmToken::EndOfStatement, "EOL");
827    }
828
829    if (DirectiveID.getString() == ".tabletype") {
830      // .tabletype SYM, ELEMTYPE[, MINSIZE[, MAXSIZE]]
831      auto SymName = expectIdent();
832      if (SymName.empty())
833        return ParseStatus::Failure;
834      if (expect(AsmToken::Comma, ","))
835        return ParseStatus::Failure;
836
837      auto ElemTypeTok = Lexer.getTok();
838      auto ElemTypeName = expectIdent();
839      if (ElemTypeName.empty())
840        return ParseStatus::Failure;
841      std::optional<wasm::ValType> ElemType =
842          WebAssembly::parseType(ElemTypeName);
843      if (!ElemType)
844        return error("Unknown type in .tabletype directive: ", ElemTypeTok);
845
846      wasm::WasmLimits Limits = DefaultLimits();
847      if (isNext(AsmToken::Comma) && parseLimits(&Limits))
848        return ParseStatus::Failure;
849
850      // Now that we have the name and table type, we can actually create the
851      // symbol
852      auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
853      WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
854      wasm::WasmTableType Type = {*ElemType, Limits};
855      WasmSym->setTableType(Type);
856      TOut.emitTableType(WasmSym);
857      return expect(AsmToken::EndOfStatement, "EOL");
858    }
859
860    if (DirectiveID.getString() == ".functype") {
861      // This code has to send things to the streamer similar to
862      // WebAssemblyAsmPrinter::EmitFunctionBodyStart.
863      // TODO: would be good to factor this into a common function, but the
864      // assembler and backend really don't share any common code, and this code
865      // parses the locals separately.
866      auto SymName = expectIdent();
867      if (SymName.empty())
868        return ParseStatus::Failure;
869      auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
870      if (WasmSym->isDefined()) {
871        // We push 'Function' either when a label is parsed or a .functype
872        // directive is parsed. The reason it is not easy to do this uniformly
873        // in a single place is,
874        // 1. We can't do this at label parsing time only because there are
875        //    cases we don't have .functype directive before a function label,
876        //    in which case we don't know if the label is a function at the time
877        //    of parsing.
878        // 2. We can't do this at .functype parsing time only because we want to
879        //    detect a function started with a label and not ended correctly
880        //    without encountering a .functype directive after the label.
881        if (CurrentState != FunctionLabel) {
882          // This .functype indicates a start of a function.
883          if (ensureEmptyNestingStack())
884            return ParseStatus::Failure;
885          push(Function);
886        }
887        CurrentState = FunctionStart;
888        LastFunctionLabel = WasmSym;
889      }
890      auto Signature = std::make_unique<wasm::WasmSignature>();
891      if (parseSignature(Signature.get()))
892        return ParseStatus::Failure;
893      TC.funcDecl(*Signature);
894      WasmSym->setSignature(Signature.get());
895      addSignature(std::move(Signature));
896      WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
897      TOut.emitFunctionType(WasmSym);
898      // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
899      return expect(AsmToken::EndOfStatement, "EOL");
900    }
901
902    if (DirectiveID.getString() == ".export_name") {
903      auto SymName = expectIdent();
904      if (SymName.empty())
905        return ParseStatus::Failure;
906      if (expect(AsmToken::Comma, ","))
907        return ParseStatus::Failure;
908      auto ExportName = expectIdent();
909      if (ExportName.empty())
910        return ParseStatus::Failure;
911      auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
912      WasmSym->setExportName(storeName(ExportName));
913      TOut.emitExportName(WasmSym, ExportName);
914      return expect(AsmToken::EndOfStatement, "EOL");
915    }
916
917    if (DirectiveID.getString() == ".import_module") {
918      auto SymName = expectIdent();
919      if (SymName.empty())
920        return ParseStatus::Failure;
921      if (expect(AsmToken::Comma, ","))
922        return ParseStatus::Failure;
923      auto ImportModule = expectIdent();
924      if (ImportModule.empty())
925        return ParseStatus::Failure;
926      auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
927      WasmSym->setImportModule(storeName(ImportModule));
928      TOut.emitImportModule(WasmSym, ImportModule);
929      return expect(AsmToken::EndOfStatement, "EOL");
930    }
931
932    if (DirectiveID.getString() == ".import_name") {
933      auto SymName = expectIdent();
934      if (SymName.empty())
935        return ParseStatus::Failure;
936      if (expect(AsmToken::Comma, ","))
937        return ParseStatus::Failure;
938      auto ImportName = expectIdent();
939      if (ImportName.empty())
940        return ParseStatus::Failure;
941      auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
942      WasmSym->setImportName(storeName(ImportName));
943      TOut.emitImportName(WasmSym, ImportName);
944      return expect(AsmToken::EndOfStatement, "EOL");
945    }
946
947    if (DirectiveID.getString() == ".tagtype") {
948      auto SymName = expectIdent();
949      if (SymName.empty())
950        return ParseStatus::Failure;
951      auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
952      auto Signature = std::make_unique<wasm::WasmSignature>();
953      if (parseRegTypeList(Signature->Params))
954        return ParseStatus::Failure;
955      WasmSym->setSignature(Signature.get());
956      addSignature(std::move(Signature));
957      WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TAG);
958      TOut.emitTagType(WasmSym);
959      // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
960      return expect(AsmToken::EndOfStatement, "EOL");
961    }
962
963    if (DirectiveID.getString() == ".local") {
964      if (CurrentState != FunctionStart)
965        return error(".local directive should follow the start of a function: ",
966                     Lexer.getTok());
967      SmallVector<wasm::ValType, 4> Locals;
968      if (parseRegTypeList(Locals))
969        return ParseStatus::Failure;
970      TC.localDecl(Locals);
971      TOut.emitLocal(Locals);
972      CurrentState = FunctionLocals;
973      return expect(AsmToken::EndOfStatement, "EOL");
974    }
975
976    if (DirectiveID.getString() == ".int8" ||
977        DirectiveID.getString() == ".int16" ||
978        DirectiveID.getString() == ".int32" ||
979        DirectiveID.getString() == ".int64") {
980      if (CheckDataSection())
981        return ParseStatus::Failure;
982      const MCExpr *Val;
983      SMLoc End;
984      if (Parser.parseExpression(Val, End))
985        return error("Cannot parse .int expression: ", Lexer.getTok());
986      size_t NumBits = 0;
987      DirectiveID.getString().drop_front(4).getAsInteger(10, NumBits);
988      Out.emitValue(Val, NumBits / 8, End);
989      return expect(AsmToken::EndOfStatement, "EOL");
990    }
991
992    if (DirectiveID.getString() == ".asciz") {
993      if (CheckDataSection())
994        return ParseStatus::Failure;
995      std::string S;
996      if (Parser.parseEscapedString(S))
997        return error("Cannot parse string constant: ", Lexer.getTok());
998      Out.emitBytes(StringRef(S.c_str(), S.length() + 1));
999      return expect(AsmToken::EndOfStatement, "EOL");
1000    }
1001
1002    return ParseStatus::NoMatch; // We didn't process this directive.
1003  }
1004
1005  // Called either when the first instruction is parsed of the function ends.
1006  void ensureLocals(MCStreamer &Out) {
1007    if (CurrentState == FunctionStart) {
1008      // We haven't seen a .local directive yet. The streamer requires locals to
1009      // be encoded as a prelude to the instructions, so emit an empty list of
1010      // locals here.
1011      auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
1012          *Out.getTargetStreamer());
1013      TOut.emitLocal(SmallVector<wasm::ValType, 0>());
1014      CurrentState = FunctionLocals;
1015    }
1016  }
1017
1018  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,
1019                               OperandVector &Operands, MCStreamer &Out,
1020                               uint64_t &ErrorInfo,
1021                               bool MatchingInlineAsm) override {
1022    MCInst Inst;
1023    Inst.setLoc(IDLoc);
1024    FeatureBitset MissingFeatures;
1025    unsigned MatchResult = MatchInstructionImpl(
1026        Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm);
1027    switch (MatchResult) {
1028    case Match_Success: {
1029      ensureLocals(Out);
1030      // Fix unknown p2align operands.
1031      auto Align = WebAssembly::GetDefaultP2AlignAny(Inst.getOpcode());
1032      if (Align != -1U) {
1033        auto &Op0 = Inst.getOperand(0);
1034        if (Op0.getImm() == -1)
1035          Op0.setImm(Align);
1036      }
1037      if (is64) {
1038        // Upgrade 32-bit loads/stores to 64-bit. These mostly differ by having
1039        // an offset64 arg instead of offset32, but to the assembler matcher
1040        // they're both immediates so don't get selected for.
1041        auto Opc64 = WebAssembly::getWasm64Opcode(
1042            static_cast<uint16_t>(Inst.getOpcode()));
1043        if (Opc64 >= 0) {
1044          Inst.setOpcode(Opc64);
1045        }
1046      }
1047      if (!SkipTypeCheck && TC.typeCheck(IDLoc, Inst, Operands))
1048        return true;
1049      Out.emitInstruction(Inst, getSTI());
1050      if (CurrentState == EndFunction) {
1051        onEndOfFunction(IDLoc);
1052      } else {
1053        CurrentState = Instructions;
1054      }
1055      return false;
1056    }
1057    case Match_MissingFeature: {
1058      assert(MissingFeatures.count() > 0 && "Expected missing features");
1059      SmallString<128> Message;
1060      raw_svector_ostream OS(Message);
1061      OS << "instruction requires:";
1062      for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i)
1063        if (MissingFeatures.test(i))
1064          OS << ' ' << getSubtargetFeatureName(i);
1065      return Parser.Error(IDLoc, Message);
1066    }
1067    case Match_MnemonicFail:
1068      return Parser.Error(IDLoc, "invalid instruction");
1069    case Match_NearMisses:
1070      return Parser.Error(IDLoc, "ambiguous instruction");
1071    case Match_InvalidTiedOperand:
1072    case Match_InvalidOperand: {
1073      SMLoc ErrorLoc = IDLoc;
1074      if (ErrorInfo != ~0ULL) {
1075        if (ErrorInfo >= Operands.size())
1076          return Parser.Error(IDLoc, "too few operands for instruction");
1077        ErrorLoc = Operands[ErrorInfo]->getStartLoc();
1078        if (ErrorLoc == SMLoc())
1079          ErrorLoc = IDLoc;
1080      }
1081      return Parser.Error(ErrorLoc, "invalid operand for instruction");
1082    }
1083    }
1084    llvm_unreachable("Implement any new match types added!");
1085  }
1086
1087  void doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) override {
1088    // Code below only applies to labels in text sections.
1089    auto CWS = cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
1090    if (!CWS || !CWS->getKind().isText())
1091      return;
1092
1093    auto WasmSym = cast<MCSymbolWasm>(Symbol);
1094    // Unlike other targets, we don't allow data in text sections (labels
1095    // declared with .type @object).
1096    if (WasmSym->getType() == wasm::WASM_SYMBOL_TYPE_DATA) {
1097      Parser.Error(IDLoc,
1098                   "Wasm doesn\'t support data symbols in text sections");
1099      return;
1100    }
1101
1102    // Start a new section for the next function automatically, since our
1103    // object writer expects each function to have its own section. This way
1104    // The user can't forget this "convention".
1105    auto SymName = Symbol->getName();
1106    if (SymName.starts_with(".L"))
1107      return; // Local Symbol.
1108
1109    // TODO: If the user explicitly creates a new function section, we ignore
1110    // its name when we create this one. It would be nice to honor their
1111    // choice, while still ensuring that we create one if they forget.
1112    // (that requires coordination with WasmAsmParser::parseSectionDirective)
1113    auto SecName = ".text." + SymName;
1114
1115    auto *Group = CWS->getGroup();
1116    // If the current section is a COMDAT, also set the flag on the symbol.
1117    // TODO: Currently the only place that the symbols' comdat flag matters is
1118    // for importing comdat functions. But there's no way to specify that in
1119    // assembly currently.
1120    if (Group)
1121      WasmSym->setComdat(true);
1122    auto *WS =
1123        getContext().getWasmSection(SecName, SectionKind::getText(), 0, Group,
1124                                    MCContext::GenericSectionID, nullptr);
1125    getStreamer().switchSection(WS);
1126    // Also generate DWARF for this section if requested.
1127    if (getContext().getGenDwarfForAssembly())
1128      getContext().addGenDwarfSection(WS);
1129
1130    if (WasmSym->isFunction()) {
1131      // We give the location of the label (IDLoc) here, because otherwise the
1132      // lexer's next location will be used, which can be confusing. For
1133      // example:
1134      //
1135      // test0: ; This function does not end properly
1136      //   ...
1137      //
1138      // test1: ; We would like to point to this line for error
1139      //   ...  . Not this line, which can contain any instruction
1140      ensureEmptyNestingStack(IDLoc);
1141      CurrentState = FunctionLabel;
1142      LastFunctionLabel = Symbol;
1143      push(Function);
1144    }
1145  }
1146
1147  void onEndOfFunction(SMLoc ErrorLoc) {
1148    if (!SkipTypeCheck)
1149      TC.endOfFunction(ErrorLoc);
1150    // Reset the type checker state.
1151    TC.Clear();
1152  }
1153
1154  void onEndOfFile() override { ensureEmptyNestingStack(); }
1155};
1156} // end anonymous namespace
1157
1158// Force static initialization.
1159extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyAsmParser() {
1160  RegisterMCAsmParser<WebAssemblyAsmParser> X(getTheWebAssemblyTarget32());
1161  RegisterMCAsmParser<WebAssemblyAsmParser> Y(getTheWebAssemblyTarget64());
1162}
1163
1164#define GET_REGISTER_MATCHER
1165#define GET_SUBTARGET_FEATURE_NAME
1166#define GET_MATCHER_IMPLEMENTATION
1167#include "WebAssemblyGenAsmMatcher.inc"
1168
1169StringRef GetMnemonic(unsigned Opc) {
1170  // FIXME: linear search!
1171  for (auto &ME : MatchTable0) {
1172    if (ME.Opcode == Opc) {
1173      return ME.getMnemonic();
1174    }
1175  }
1176  assert(false && "mnemonic not found");
1177  return StringRef();
1178}
1179