1327952Sdim//===- Parser.cpp - Matcher expression parser -----------------------------===//
2259701Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6259701Sdim//
7259701Sdim//===----------------------------------------------------------------------===//
8259701Sdim///
9259701Sdim/// \file
10341825Sdim/// Recursive parser implementation for the matcher expression grammar.
11259701Sdim///
12259701Sdim//===----------------------------------------------------------------------===//
13259701Sdim
14259701Sdim#include "clang/ASTMatchers/Dynamic/Parser.h"
15327952Sdim#include "clang/ASTMatchers/ASTMatchersInternal.h"
16327952Sdim#include "clang/ASTMatchers/Dynamic/Diagnostics.h"
17259701Sdim#include "clang/ASTMatchers/Dynamic/Registry.h"
18259701Sdim#include "clang/Basic/CharInfo.h"
19276479Sdim#include "llvm/ADT/Optional.h"
20327952Sdim#include "llvm/ADT/StringRef.h"
21327952Sdim#include "llvm/Support/ErrorHandling.h"
22280031Sdim#include "llvm/Support/ManagedStatic.h"
23327952Sdim#include <algorithm>
24327952Sdim#include <cassert>
25327952Sdim#include <cerrno>
26327952Sdim#include <cstddef>
27327952Sdim#include <cstdlib>
28276479Sdim#include <string>
29327952Sdim#include <utility>
30276479Sdim#include <vector>
31259701Sdim
32259701Sdimnamespace clang {
33259701Sdimnamespace ast_matchers {
34259701Sdimnamespace dynamic {
35259701Sdim
36341825Sdim/// Simple structure to hold information for one token from the parser.
37259701Sdimstruct Parser::TokenInfo {
38341825Sdim  /// Different possible tokens.
39259701Sdim  enum TokenKind {
40276479Sdim    TK_Eof,
41360784Sdim    TK_NewLine,
42276479Sdim    TK_OpenParen,
43276479Sdim    TK_CloseParen,
44276479Sdim    TK_Comma,
45276479Sdim    TK_Period,
46276479Sdim    TK_Literal,
47276479Sdim    TK_Ident,
48276479Sdim    TK_InvalidChar,
49276479Sdim    TK_Error,
50276479Sdim    TK_CodeCompletion
51259701Sdim  };
52259701Sdim
53341825Sdim  /// Some known identifiers.
54259701Sdim  static const char* const ID_Bind;
55259701Sdim
56327952Sdim  TokenInfo() = default;
57259701Sdim
58259701Sdim  StringRef Text;
59327952Sdim  TokenKind Kind = TK_Eof;
60259701Sdim  SourceRange Range;
61259701Sdim  VariantValue Value;
62259701Sdim};
63259701Sdim
64259701Sdimconst char* const Parser::TokenInfo::ID_Bind = "bind";
65259701Sdim
66341825Sdim/// Simple tokenizer for the parser.
67259701Sdimclass Parser::CodeTokenizer {
68259701Sdimpublic:
69360784Sdim  explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error)
70327952Sdim      : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
71259701Sdim    NextToken = getNextToken();
72259701Sdim  }
73259701Sdim
74360784Sdim  CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error,
75276479Sdim                unsigned CodeCompletionOffset)
76327952Sdim      : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
77276479Sdim        CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
78276479Sdim    NextToken = getNextToken();
79276479Sdim  }
80276479Sdim
81341825Sdim  /// Returns but doesn't consume the next token.
82259701Sdim  const TokenInfo &peekNextToken() const { return NextToken; }
83259701Sdim
84341825Sdim  /// Consumes and returns the next token.
85259701Sdim  TokenInfo consumeNextToken() {
86259701Sdim    TokenInfo ThisToken = NextToken;
87259701Sdim    NextToken = getNextToken();
88259701Sdim    return ThisToken;
89259701Sdim  }
90259701Sdim
91360784Sdim  TokenInfo SkipNewlines() {
92360784Sdim    while (NextToken.Kind == TokenInfo::TK_NewLine)
93360784Sdim      NextToken = getNextToken();
94360784Sdim    return NextToken;
95360784Sdim  }
96360784Sdim
97360784Sdim  TokenInfo consumeNextTokenIgnoreNewlines() {
98360784Sdim    SkipNewlines();
99360784Sdim    if (NextToken.Kind == TokenInfo::TK_Eof)
100360784Sdim      return NextToken;
101360784Sdim    return consumeNextToken();
102360784Sdim  }
103360784Sdim
104259701Sdim  TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
105259701Sdim
106259701Sdimprivate:
107259701Sdim  TokenInfo getNextToken() {
108259701Sdim    consumeWhitespace();
109259701Sdim    TokenInfo Result;
110259701Sdim    Result.Range.Start = currentLocation();
111259701Sdim
112276479Sdim    if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
113276479Sdim      Result.Kind = TokenInfo::TK_CodeCompletion;
114276479Sdim      Result.Text = StringRef(CodeCompletionLocation, 0);
115276479Sdim      CodeCompletionLocation = nullptr;
116276479Sdim      return Result;
117276479Sdim    }
118276479Sdim
119259701Sdim    if (Code.empty()) {
120259701Sdim      Result.Kind = TokenInfo::TK_Eof;
121259701Sdim      Result.Text = "";
122259701Sdim      return Result;
123259701Sdim    }
124259701Sdim
125259701Sdim    switch (Code[0]) {
126344779Sdim    case '#':
127360784Sdim      Code = Code.drop_until([](char c) { return c == '\n'; });
128360784Sdim      return getNextToken();
129259701Sdim    case ',':
130259701Sdim      Result.Kind = TokenInfo::TK_Comma;
131259701Sdim      Result.Text = Code.substr(0, 1);
132259701Sdim      Code = Code.drop_front();
133259701Sdim      break;
134259701Sdim    case '.':
135259701Sdim      Result.Kind = TokenInfo::TK_Period;
136259701Sdim      Result.Text = Code.substr(0, 1);
137259701Sdim      Code = Code.drop_front();
138259701Sdim      break;
139360784Sdim    case '\n':
140360784Sdim      ++Line;
141360784Sdim      StartOfLine = Code.drop_front();
142360784Sdim      Result.Kind = TokenInfo::TK_NewLine;
143360784Sdim      Result.Text = Code.substr(0, 1);
144360784Sdim      Code = Code.drop_front();
145360784Sdim      break;
146259701Sdim    case '(':
147259701Sdim      Result.Kind = TokenInfo::TK_OpenParen;
148259701Sdim      Result.Text = Code.substr(0, 1);
149259701Sdim      Code = Code.drop_front();
150259701Sdim      break;
151259701Sdim    case ')':
152259701Sdim      Result.Kind = TokenInfo::TK_CloseParen;
153259701Sdim      Result.Text = Code.substr(0, 1);
154259701Sdim      Code = Code.drop_front();
155259701Sdim      break;
156259701Sdim
157259701Sdim    case '"':
158259701Sdim    case '\'':
159259701Sdim      // Parse a string literal.
160259701Sdim      consumeStringLiteral(&Result);
161259701Sdim      break;
162259701Sdim
163259701Sdim    case '0': case '1': case '2': case '3': case '4':
164259701Sdim    case '5': case '6': case '7': case '8': case '9':
165321369Sdim      // Parse an unsigned and float literal.
166321369Sdim      consumeNumberLiteral(&Result);
167259701Sdim      break;
168259701Sdim
169259701Sdim    default:
170259701Sdim      if (isAlphanumeric(Code[0])) {
171259701Sdim        // Parse an identifier
172259701Sdim        size_t TokenLength = 1;
173327952Sdim        while (true) {
174276479Sdim          // A code completion location in/immediately after an identifier will
175276479Sdim          // cause the portion of the identifier before the code completion
176276479Sdim          // location to become a code completion token.
177276479Sdim          if (CodeCompletionLocation == Code.data() + TokenLength) {
178276479Sdim            CodeCompletionLocation = nullptr;
179276479Sdim            Result.Kind = TokenInfo::TK_CodeCompletion;
180276479Sdim            Result.Text = Code.substr(0, TokenLength);
181276479Sdim            Code = Code.drop_front(TokenLength);
182276479Sdim            return Result;
183276479Sdim          }
184276479Sdim          if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
185276479Sdim            break;
186259701Sdim          ++TokenLength;
187276479Sdim        }
188321369Sdim        if (TokenLength == 4 && Code.startswith("true")) {
189321369Sdim          Result.Kind = TokenInfo::TK_Literal;
190321369Sdim          Result.Value = true;
191321369Sdim        } else if (TokenLength == 5 && Code.startswith("false")) {
192321369Sdim          Result.Kind = TokenInfo::TK_Literal;
193321369Sdim          Result.Value = false;
194321369Sdim        } else {
195321369Sdim          Result.Kind = TokenInfo::TK_Ident;
196321369Sdim          Result.Text = Code.substr(0, TokenLength);
197321369Sdim        }
198259701Sdim        Code = Code.drop_front(TokenLength);
199259701Sdim      } else {
200259701Sdim        Result.Kind = TokenInfo::TK_InvalidChar;
201259701Sdim        Result.Text = Code.substr(0, 1);
202259701Sdim        Code = Code.drop_front(1);
203259701Sdim      }
204259701Sdim      break;
205259701Sdim    }
206259701Sdim
207259701Sdim    Result.Range.End = currentLocation();
208259701Sdim    return Result;
209259701Sdim  }
210259701Sdim
211341825Sdim  /// Consume an unsigned and float literal.
212321369Sdim  void consumeNumberLiteral(TokenInfo *Result) {
213321369Sdim    bool isFloatingLiteral = false;
214259701Sdim    unsigned Length = 1;
215259701Sdim    if (Code.size() > 1) {
216259701Sdim      // Consume the 'x' or 'b' radix modifier, if present.
217259701Sdim      switch (toLowercase(Code[1])) {
218259701Sdim      case 'x': case 'b': Length = 2;
219259701Sdim      }
220259701Sdim    }
221259701Sdim    while (Length < Code.size() && isHexDigit(Code[Length]))
222259701Sdim      ++Length;
223259701Sdim
224321369Sdim    // Try to recognize a floating point literal.
225321369Sdim    while (Length < Code.size()) {
226321369Sdim      char c = Code[Length];
227321369Sdim      if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
228321369Sdim        isFloatingLiteral = true;
229321369Sdim        Length++;
230321369Sdim      } else {
231321369Sdim        break;
232321369Sdim      }
233321369Sdim    }
234321369Sdim
235259701Sdim    Result->Text = Code.substr(0, Length);
236259701Sdim    Code = Code.drop_front(Length);
237259701Sdim
238321369Sdim    if (isFloatingLiteral) {
239321369Sdim      char *end;
240321369Sdim      errno = 0;
241321369Sdim      std::string Text = Result->Text.str();
242321369Sdim      double doubleValue = strtod(Text.c_str(), &end);
243321369Sdim      if (*end == 0 && errno == 0) {
244321369Sdim        Result->Kind = TokenInfo::TK_Literal;
245321369Sdim        Result->Value = doubleValue;
246321369Sdim        return;
247321369Sdim      }
248259701Sdim    } else {
249321369Sdim      unsigned Value;
250321369Sdim      if (!Result->Text.getAsInteger(0, Value)) {
251321369Sdim        Result->Kind = TokenInfo::TK_Literal;
252321369Sdim        Result->Value = Value;
253321369Sdim        return;
254321369Sdim      }
255259701Sdim    }
256321369Sdim
257321369Sdim    SourceRange Range;
258321369Sdim    Range.Start = Result->Range.Start;
259321369Sdim    Range.End = currentLocation();
260321369Sdim    Error->addError(Range, Error->ET_ParserNumberError) << Result->Text;
261321369Sdim    Result->Kind = TokenInfo::TK_Error;
262259701Sdim  }
263259701Sdim
264341825Sdim  /// Consume a string literal.
265259701Sdim  ///
266259701Sdim  /// \c Code must be positioned at the start of the literal (the opening
267259701Sdim  /// quote). Consumed until it finds the same closing quote character.
268259701Sdim  void consumeStringLiteral(TokenInfo *Result) {
269259701Sdim    bool InEscape = false;
270259701Sdim    const char Marker = Code[0];
271259701Sdim    for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
272259701Sdim      if (InEscape) {
273259701Sdim        InEscape = false;
274259701Sdim        continue;
275259701Sdim      }
276259701Sdim      if (Code[Length] == '\\') {
277259701Sdim        InEscape = true;
278259701Sdim        continue;
279259701Sdim      }
280259701Sdim      if (Code[Length] == Marker) {
281259701Sdim        Result->Kind = TokenInfo::TK_Literal;
282259701Sdim        Result->Text = Code.substr(0, Length + 1);
283288943Sdim        Result->Value = Code.substr(1, Length - 1);
284259701Sdim        Code = Code.drop_front(Length + 1);
285259701Sdim        return;
286259701Sdim      }
287259701Sdim    }
288259701Sdim
289259701Sdim    StringRef ErrorText = Code;
290259701Sdim    Code = Code.drop_front(Code.size());
291259701Sdim    SourceRange Range;
292259701Sdim    Range.Start = Result->Range.Start;
293259701Sdim    Range.End = currentLocation();
294259701Sdim    Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
295259701Sdim    Result->Kind = TokenInfo::TK_Error;
296259701Sdim  }
297259701Sdim
298341825Sdim  /// Consume all leading whitespace from \c Code.
299259701Sdim  void consumeWhitespace() {
300360784Sdim    Code = Code.drop_while([](char c) {
301360784Sdim      // Don't trim newlines.
302360784Sdim      return StringRef(" \t\v\f\r").contains(c);
303360784Sdim    });
304259701Sdim  }
305259701Sdim
306259701Sdim  SourceLocation currentLocation() {
307259701Sdim    SourceLocation Location;
308259701Sdim    Location.Line = Line;
309259701Sdim    Location.Column = Code.data() - StartOfLine.data() + 1;
310259701Sdim    return Location;
311259701Sdim  }
312259701Sdim
313360784Sdim  StringRef &Code;
314259701Sdim  StringRef StartOfLine;
315327952Sdim  unsigned Line = 1;
316259701Sdim  Diagnostics *Error;
317259701Sdim  TokenInfo NextToken;
318327952Sdim  const char *CodeCompletionLocation = nullptr;
319259701Sdim};
320259701Sdim
321327952SdimParser::Sema::~Sema() = default;
322259701Sdim
323280031Sdimstd::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
324280031Sdim    llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
325327952Sdim  return {};
326276479Sdim}
327276479Sdim
328280031Sdimstd::vector<MatcherCompletion>
329280031SdimParser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
330327952Sdim  return {};
331280031Sdim}
332280031Sdim
333276479Sdimstruct Parser::ScopedContextEntry {
334276479Sdim  Parser *P;
335276479Sdim
336276479Sdim  ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
337276479Sdim    P->ContextStack.push_back(std::make_pair(C, 0u));
338276479Sdim  }
339276479Sdim
340276479Sdim  ~ScopedContextEntry() {
341276479Sdim    P->ContextStack.pop_back();
342276479Sdim  }
343276479Sdim
344276479Sdim  void nextArg() {
345276479Sdim    ++P->ContextStack.back().second;
346276479Sdim  }
347276479Sdim};
348276479Sdim
349341825Sdim/// Parse expressions that start with an identifier.
350276479Sdim///
351276479Sdim/// This function can parse named values and matchers.
352276479Sdim/// In case of failure it will try to determine the user's intent to give
353276479Sdim/// an appropriate error message.
354276479Sdimbool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
355276479Sdim  const TokenInfo NameToken = Tokenizer->consumeNextToken();
356276479Sdim
357276479Sdim  if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
358276479Sdim    // Parse as a named value.
359280031Sdim    if (const VariantValue NamedValue =
360280031Sdim            NamedValues ? NamedValues->lookup(NameToken.Text)
361280031Sdim                        : VariantValue()) {
362344779Sdim
363344779Sdim      if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) {
364344779Sdim        *Value = NamedValue;
365344779Sdim        return true;
366344779Sdim      }
367344779Sdim
368344779Sdim      std::string BindID;
369344779Sdim      if (!parseBindID(BindID))
370344779Sdim        return false;
371344779Sdim
372344779Sdim      assert(NamedValue.isMatcher());
373344779Sdim      llvm::Optional<DynTypedMatcher> Result =
374344779Sdim          NamedValue.getMatcher().getSingleMatcher();
375344779Sdim      if (Result.hasValue()) {
376344779Sdim        llvm::Optional<DynTypedMatcher> Bound = Result->tryBind(BindID);
377344779Sdim        if (Bound.hasValue()) {
378344779Sdim          *Value = VariantMatcher::SingleMatcher(*Bound);
379344779Sdim          return true;
380344779Sdim        }
381344779Sdim      }
382344779Sdim      return false;
383276479Sdim    }
384360784Sdim
385360784Sdim    if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) {
386360784Sdim      Error->addError(Tokenizer->peekNextToken().Range,
387360784Sdim                      Error->ET_ParserNoOpenParen)
388360784Sdim          << "NewLine";
389360784Sdim      return false;
390360784Sdim    }
391360784Sdim
392276479Sdim    // If the syntax is correct and the name is not a matcher either, report
393276479Sdim    // unknown named value.
394276479Sdim    if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
395276479Sdim         Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
396360784Sdim         Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine ||
397276479Sdim         Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
398276479Sdim        !S->lookupMatcherCtor(NameToken.Text)) {
399276479Sdim      Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
400276479Sdim          << NameToken.Text;
401276479Sdim      return false;
402276479Sdim    }
403276479Sdim    // Otherwise, fallback to the matcher parser.
404276479Sdim  }
405276479Sdim
406360784Sdim  Tokenizer->SkipNewlines();
407360784Sdim
408276479Sdim  // Parse as a matcher expression.
409276479Sdim  return parseMatcherExpressionImpl(NameToken, Value);
410276479Sdim}
411276479Sdim
412344779Sdimbool Parser::parseBindID(std::string &BindID) {
413344779Sdim  // Parse .bind("foo")
414344779Sdim  assert(Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period);
415344779Sdim  Tokenizer->consumeNextToken(); // consume the period.
416344779Sdim  const TokenInfo BindToken = Tokenizer->consumeNextToken();
417344779Sdim  if (BindToken.Kind == TokenInfo::TK_CodeCompletion) {
418344779Sdim    addCompletion(BindToken, MatcherCompletion("bind(\"", "bind", 1));
419344779Sdim    return false;
420344779Sdim  }
421344779Sdim
422344779Sdim  const TokenInfo OpenToken = Tokenizer->consumeNextToken();
423360784Sdim  const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines();
424360784Sdim  const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines();
425344779Sdim
426344779Sdim  // TODO: We could use different error codes for each/some to be more
427344779Sdim  //       explicit about the syntax error.
428344779Sdim  if (BindToken.Kind != TokenInfo::TK_Ident ||
429344779Sdim      BindToken.Text != TokenInfo::ID_Bind) {
430344779Sdim    Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr);
431344779Sdim    return false;
432344779Sdim  }
433344779Sdim  if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
434344779Sdim    Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
435344779Sdim    return false;
436344779Sdim  }
437344779Sdim  if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
438344779Sdim    Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
439344779Sdim    return false;
440344779Sdim  }
441344779Sdim  if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
442344779Sdim    Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
443344779Sdim    return false;
444344779Sdim  }
445344779Sdim  BindID = IDToken.Value.getString();
446344779Sdim  return true;
447344779Sdim}
448344779Sdim
449341825Sdim/// Parse and validate a matcher expression.
450259701Sdim/// \return \c true on success, in which case \c Value has the matcher parsed.
451259701Sdim///   If the input is malformed, or some argument has an error, it
452259701Sdim///   returns \c false.
453276479Sdimbool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
454276479Sdim                                        VariantValue *Value) {
455259701Sdim  assert(NameToken.Kind == TokenInfo::TK_Ident);
456259701Sdim  const TokenInfo OpenToken = Tokenizer->consumeNextToken();
457259701Sdim  if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
458259701Sdim    Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
459259701Sdim        << OpenToken.Text;
460259701Sdim    return false;
461259701Sdim  }
462259701Sdim
463276479Sdim  llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);
464276479Sdim
465276479Sdim  if (!Ctor) {
466276479Sdim    Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
467276479Sdim        << NameToken.Text;
468276479Sdim    // Do not return here. We need to continue to give completion suggestions.
469276479Sdim  }
470276479Sdim
471259701Sdim  std::vector<ParserValue> Args;
472259701Sdim  TokenInfo EndToken;
473276479Sdim
474360784Sdim  Tokenizer->SkipNewlines();
475360784Sdim
476276479Sdim  {
477276479Sdim    ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr);
478276479Sdim
479276479Sdim    while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
480276479Sdim      if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
481276479Sdim        // End of args.
482276479Sdim        EndToken = Tokenizer->consumeNextToken();
483276479Sdim        break;
484276479Sdim      }
485327952Sdim      if (!Args.empty()) {
486276479Sdim        // We must find a , token to continue.
487276479Sdim        const TokenInfo CommaToken = Tokenizer->consumeNextToken();
488276479Sdim        if (CommaToken.Kind != TokenInfo::TK_Comma) {
489276479Sdim          Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
490276479Sdim              << CommaToken.Text;
491276479Sdim          return false;
492276479Sdim        }
493276479Sdim      }
494276479Sdim
495276479Sdim      Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
496276479Sdim                               NameToken.Text, NameToken.Range,
497276479Sdim                               Args.size() + 1);
498276479Sdim      ParserValue ArgValue;
499360784Sdim      Tokenizer->SkipNewlines();
500276479Sdim      ArgValue.Text = Tokenizer->peekNextToken().Text;
501276479Sdim      ArgValue.Range = Tokenizer->peekNextToken().Range;
502276479Sdim      if (!parseExpressionImpl(&ArgValue.Value)) {
503259701Sdim        return false;
504259701Sdim      }
505276479Sdim
506360784Sdim      Tokenizer->SkipNewlines();
507276479Sdim      Args.push_back(ArgValue);
508276479Sdim      SCE.nextArg();
509259701Sdim    }
510259701Sdim  }
511259701Sdim
512259701Sdim  if (EndToken.Kind == TokenInfo::TK_Eof) {
513259701Sdim    Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
514259701Sdim    return false;
515259701Sdim  }
516259701Sdim
517259701Sdim  std::string BindID;
518259701Sdim  if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
519344779Sdim    if (!parseBindID(BindID))
520276479Sdim      return false;
521259701Sdim  }
522259701Sdim
523276479Sdim  if (!Ctor)
524276479Sdim    return false;
525276479Sdim
526259701Sdim  // Merge the start and end infos.
527259701Sdim  Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
528259701Sdim                           NameToken.Text, NameToken.Range);
529259701Sdim  SourceRange MatcherRange = NameToken.Range;
530259701Sdim  MatcherRange.End = EndToken.Range.End;
531259701Sdim  VariantMatcher Result = S->actOnMatcherExpression(
532276479Sdim      *Ctor, MatcherRange, BindID, Args, Error);
533259701Sdim  if (Result.isNull()) return false;
534259701Sdim
535259701Sdim  *Value = Result;
536259701Sdim  return true;
537259701Sdim}
538259701Sdim
539276479Sdim// If the prefix of this completion matches the completion token, add it to
540276479Sdim// Completions minus the prefix.
541280031Sdimvoid Parser::addCompletion(const TokenInfo &CompToken,
542280031Sdim                           const MatcherCompletion& Completion) {
543280031Sdim  if (StringRef(Completion.TypedText).startswith(CompToken.Text) &&
544280031Sdim      Completion.Specificity > 0) {
545280031Sdim    Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),
546280031Sdim                             Completion.MatcherDecl, Completion.Specificity);
547276479Sdim  }
548276479Sdim}
549276479Sdim
550280031Sdimstd::vector<MatcherCompletion> Parser::getNamedValueCompletions(
551280031Sdim    ArrayRef<ArgKind> AcceptedTypes) {
552280031Sdim  if (!NamedValues) return std::vector<MatcherCompletion>();
553280031Sdim  std::vector<MatcherCompletion> Result;
554280031Sdim  for (const auto &Entry : *NamedValues) {
555280031Sdim    unsigned Specificity;
556280031Sdim    if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {
557280031Sdim      std::string Decl =
558280031Sdim          (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
559280031Sdim      Result.emplace_back(Entry.getKey(), Decl, Specificity);
560280031Sdim    }
561280031Sdim  }
562280031Sdim  return Result;
563280031Sdim}
564280031Sdim
565276479Sdimvoid Parser::addExpressionCompletions() {
566360784Sdim  const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines();
567276479Sdim  assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
568276479Sdim
569276479Sdim  // We cannot complete code if there is an invalid element on the context
570276479Sdim  // stack.
571276479Sdim  for (ContextStackTy::iterator I = ContextStack.begin(),
572276479Sdim                                E = ContextStack.end();
573276479Sdim       I != E; ++I) {
574276479Sdim    if (!I->first)
575276479Sdim      return;
576276479Sdim  }
577276479Sdim
578280031Sdim  auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack);
579280031Sdim  for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
580280031Sdim    addCompletion(CompToken, Completion);
581276479Sdim  }
582280031Sdim
583280031Sdim  for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
584280031Sdim    addCompletion(CompToken, Completion);
585280031Sdim  }
586276479Sdim}
587276479Sdim
588341825Sdim/// Parse an <Expression>
589259701Sdimbool Parser::parseExpressionImpl(VariantValue *Value) {
590259701Sdim  switch (Tokenizer->nextTokenKind()) {
591259701Sdim  case TokenInfo::TK_Literal:
592259701Sdim    *Value = Tokenizer->consumeNextToken().Value;
593259701Sdim    return true;
594259701Sdim
595259701Sdim  case TokenInfo::TK_Ident:
596276479Sdim    return parseIdentifierPrefixImpl(Value);
597259701Sdim
598276479Sdim  case TokenInfo::TK_CodeCompletion:
599276479Sdim    addExpressionCompletions();
600276479Sdim    return false;
601276479Sdim
602259701Sdim  case TokenInfo::TK_Eof:
603259701Sdim    Error->addError(Tokenizer->consumeNextToken().Range,
604259701Sdim                    Error->ET_ParserNoCode);
605259701Sdim    return false;
606259701Sdim
607259701Sdim  case TokenInfo::TK_Error:
608259701Sdim    // This error was already reported by the tokenizer.
609259701Sdim    return false;
610360784Sdim  case TokenInfo::TK_NewLine:
611259701Sdim  case TokenInfo::TK_OpenParen:
612259701Sdim  case TokenInfo::TK_CloseParen:
613259701Sdim  case TokenInfo::TK_Comma:
614259701Sdim  case TokenInfo::TK_Period:
615259701Sdim  case TokenInfo::TK_InvalidChar:
616259701Sdim    const TokenInfo Token = Tokenizer->consumeNextToken();
617360784Sdim    Error->addError(Token.Range, Error->ET_ParserInvalidToken)
618360784Sdim        << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine" : Token.Text);
619259701Sdim    return false;
620259701Sdim  }
621259701Sdim
622259701Sdim  llvm_unreachable("Unknown token kind.");
623259701Sdim}
624259701Sdim
625280031Sdimstatic llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
626280031Sdim
627259701SdimParser::Parser(CodeTokenizer *Tokenizer, Sema *S,
628280031Sdim               const NamedValueMap *NamedValues, Diagnostics *Error)
629280031Sdim    : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
630280031Sdim      NamedValues(NamedValues), Error(Error) {}
631259701Sdim
632327952SdimParser::RegistrySema::~RegistrySema() = default;
633276479Sdim
634276479Sdimllvm::Optional<MatcherCtor>
635276479SdimParser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
636276479Sdim  return Registry::lookupMatcherCtor(MatcherName);
637276479Sdim}
638276479Sdim
639276479SdimVariantMatcher Parser::RegistrySema::actOnMatcherExpression(
640296417Sdim    MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
641276479Sdim    ArrayRef<ParserValue> Args, Diagnostics *Error) {
642276479Sdim  if (BindID.empty()) {
643276479Sdim    return Registry::constructMatcher(Ctor, NameRange, Args, Error);
644276479Sdim  } else {
645276479Sdim    return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
646276479Sdim                                           Error);
647259701Sdim  }
648276479Sdim}
649259701Sdim
650280031Sdimstd::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
651280031Sdim    ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
652280031Sdim  return Registry::getAcceptedCompletionTypes(Context);
653259701Sdim}
654259701Sdim
655280031Sdimstd::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
656280031Sdim    ArrayRef<ArgKind> AcceptedTypes) {
657280031Sdim  return Registry::getMatcherCompletions(AcceptedTypes);
658280031Sdim}
659280031Sdim
660360784Sdimbool Parser::parseExpression(StringRef &Code, Sema *S,
661280031Sdim                             const NamedValueMap *NamedValues,
662259701Sdim                             VariantValue *Value, Diagnostics *Error) {
663259701Sdim  CodeTokenizer Tokenizer(Code, Error);
664280031Sdim  if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
665280031Sdim    return false;
666360784Sdim  auto NT = Tokenizer.peekNextToken();
667360784Sdim  if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) {
668259701Sdim    Error->addError(Tokenizer.peekNextToken().Range,
669259701Sdim                    Error->ET_ParserTrailingCode);
670259701Sdim    return false;
671259701Sdim  }
672259701Sdim  return true;
673259701Sdim}
674259701Sdim
675276479Sdimstd::vector<MatcherCompletion>
676360784SdimParser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S,
677280031Sdim                           const NamedValueMap *NamedValues) {
678276479Sdim  Diagnostics Error;
679276479Sdim  CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
680280031Sdim  Parser P(&Tokenizer, S, NamedValues, &Error);
681276479Sdim  VariantValue Dummy;
682276479Sdim  P.parseExpressionImpl(&Dummy);
683276479Sdim
684280031Sdim  // Sort by specificity, then by name.
685344779Sdim  llvm::sort(P.Completions,
686341825Sdim             [](const MatcherCompletion &A, const MatcherCompletion &B) {
687344779Sdim               if (A.Specificity != B.Specificity)
688344779Sdim                 return A.Specificity > B.Specificity;
689344779Sdim               return A.TypedText < B.TypedText;
690344779Sdim             });
691280031Sdim
692276479Sdim  return P.Completions;
693276479Sdim}
694276479Sdim
695259701Sdimllvm::Optional<DynTypedMatcher>
696360784SdimParser::parseMatcherExpression(StringRef &Code, Sema *S,
697280031Sdim                               const NamedValueMap *NamedValues,
698259701Sdim                               Diagnostics *Error) {
699259701Sdim  VariantValue Value;
700280031Sdim  if (!parseExpression(Code, S, NamedValues, &Value, Error))
701259701Sdim    return llvm::Optional<DynTypedMatcher>();
702259701Sdim  if (!Value.isMatcher()) {
703259701Sdim    Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
704259701Sdim    return llvm::Optional<DynTypedMatcher>();
705259701Sdim  }
706259701Sdim  llvm::Optional<DynTypedMatcher> Result =
707259701Sdim      Value.getMatcher().getSingleMatcher();
708259701Sdim  if (!Result.hasValue()) {
709259701Sdim    Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
710259701Sdim        << Value.getTypeAsString();
711259701Sdim  }
712259701Sdim  return Result;
713259701Sdim}
714259701Sdim
715327952Sdim} // namespace dynamic
716327952Sdim} // namespace ast_matchers
717327952Sdim} // namespace clang
718