CPlusPlusNameParser.cpp revision 355940
1//===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "CPlusPlusNameParser.h"
10
11#include "clang/Basic/IdentifierTable.h"
12#include "llvm/ADT/StringMap.h"
13#include "llvm/Support/Threading.h"
14
15using namespace lldb;
16using namespace lldb_private;
17using llvm::Optional;
18using llvm::None;
19using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
20using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
21namespace tok = clang::tok;
22
23Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
24  m_next_token_index = 0;
25  Optional<ParsedFunction> result(None);
26
27  // Try to parse the name as function without a return type specified e.g.
28  // main(int, char*[])
29  {
30    Bookmark start_position = SetBookmark();
31    result = ParseFunctionImpl(false);
32    if (result && !HasMoreTokens())
33      return result;
34  }
35
36  // Try to parse the name as function with function pointer return type e.g.
37  // void (*get_func(const char*))()
38  result = ParseFuncPtr(true);
39  if (result)
40    return result;
41
42  // Finally try to parse the name as a function with non-function return type
43  // e.g. int main(int, char*[])
44  result = ParseFunctionImpl(true);
45  if (HasMoreTokens())
46    return None;
47  return result;
48}
49
50Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
51  m_next_token_index = 0;
52  Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
53  if (!name_ranges)
54    return None;
55  if (HasMoreTokens())
56    return None;
57  ParsedName result;
58  result.basename = GetTextForRange(name_ranges.getValue().basename_range);
59  result.context = GetTextForRange(name_ranges.getValue().context_range);
60  return result;
61}
62
63bool CPlusPlusNameParser::HasMoreTokens() {
64  return m_next_token_index < m_tokens.size();
65}
66
67void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
68
69void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
70
71bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
72  if (!HasMoreTokens())
73    return false;
74
75  if (!Peek().is(kind))
76    return false;
77
78  Advance();
79  return true;
80}
81
82template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
83  if (!HasMoreTokens())
84    return false;
85
86  if (!Peek().isOneOf(kinds...))
87    return false;
88
89  Advance();
90  return true;
91}
92
93CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
94  return Bookmark(m_next_token_index);
95}
96
97size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
98
99clang::Token &CPlusPlusNameParser::Peek() {
100  assert(HasMoreTokens());
101  return m_tokens[m_next_token_index];
102}
103
104Optional<ParsedFunction>
105CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
106  Bookmark start_position = SetBookmark();
107  if (expect_return_type) {
108    // Consume return type if it's expected.
109    if (!ConsumeTypename())
110      return None;
111  }
112
113  auto maybe_name = ParseFullNameImpl();
114  if (!maybe_name) {
115    return None;
116  }
117
118  size_t argument_start = GetCurrentPosition();
119  if (!ConsumeArguments()) {
120    return None;
121  }
122
123  size_t qualifiers_start = GetCurrentPosition();
124  SkipFunctionQualifiers();
125  size_t end_position = GetCurrentPosition();
126
127  ParsedFunction result;
128  result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
129  result.name.context = GetTextForRange(maybe_name.getValue().context_range);
130  result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
131  result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
132  start_position.Remove();
133  return result;
134}
135
136Optional<ParsedFunction>
137CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
138  Bookmark start_position = SetBookmark();
139  if (expect_return_type) {
140    // Consume return type.
141    if (!ConsumeTypename())
142      return None;
143  }
144
145  if (!ConsumeToken(tok::l_paren))
146    return None;
147  if (!ConsumePtrsAndRefs())
148    return None;
149
150  {
151    Bookmark before_inner_function_pos = SetBookmark();
152    auto maybe_inner_function_name = ParseFunctionImpl(false);
153    if (maybe_inner_function_name)
154      if (ConsumeToken(tok::r_paren))
155        if (ConsumeArguments()) {
156          SkipFunctionQualifiers();
157          start_position.Remove();
158          before_inner_function_pos.Remove();
159          return maybe_inner_function_name;
160        }
161  }
162
163  auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
164  if (maybe_inner_function_ptr_name)
165    if (ConsumeToken(tok::r_paren))
166      if (ConsumeArguments()) {
167        SkipFunctionQualifiers();
168        start_position.Remove();
169        return maybe_inner_function_ptr_name;
170      }
171  return None;
172}
173
174bool CPlusPlusNameParser::ConsumeArguments() {
175  return ConsumeBrackets(tok::l_paren, tok::r_paren);
176}
177
178bool CPlusPlusNameParser::ConsumeTemplateArgs() {
179  Bookmark start_position = SetBookmark();
180  if (!HasMoreTokens() || Peek().getKind() != tok::less)
181    return false;
182  Advance();
183
184  // Consuming template arguments is a bit trickier than consuming function
185  // arguments, because '<' '>' brackets are not always trivially balanced. In
186  // some rare cases tokens '<' and '>' can appear inside template arguments as
187  // arithmetic or shift operators not as template brackets. Examples:
188  // std::enable_if<(10u)<(64), bool>
189  //           f<A<operator<(X,Y)::Subclass>>
190  // Good thing that compiler makes sure that really ambiguous cases of '>'
191  // usage should be enclosed within '()' brackets.
192  int template_counter = 1;
193  bool can_open_template = false;
194  while (HasMoreTokens() && template_counter > 0) {
195    tok::TokenKind kind = Peek().getKind();
196    switch (kind) {
197    case tok::greatergreater:
198      template_counter -= 2;
199      can_open_template = false;
200      Advance();
201      break;
202    case tok::greater:
203      --template_counter;
204      can_open_template = false;
205      Advance();
206      break;
207    case tok::less:
208      // '<' is an attempt to open a subteamplte
209      // check if parser is at the point where it's actually possible,
210      // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
211      // need to do the same for '>' because compiler actually makes sure that
212      // '>' always surrounded by brackets to avoid ambiguity.
213      if (can_open_template)
214        ++template_counter;
215      can_open_template = false;
216      Advance();
217      break;
218    case tok::kw_operator: // C++ operator overloading.
219      if (!ConsumeOperator())
220        return false;
221      can_open_template = true;
222      break;
223    case tok::raw_identifier:
224      can_open_template = true;
225      Advance();
226      break;
227    case tok::l_square:
228      if (!ConsumeBrackets(tok::l_square, tok::r_square))
229        return false;
230      can_open_template = false;
231      break;
232    case tok::l_paren:
233      if (!ConsumeArguments())
234        return false;
235      can_open_template = false;
236      break;
237    default:
238      can_open_template = false;
239      Advance();
240      break;
241    }
242  }
243
244  if (template_counter != 0) {
245    return false;
246  }
247  start_position.Remove();
248  return true;
249}
250
251bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
252  Bookmark start_position = SetBookmark();
253  if (!ConsumeToken(tok::l_paren)) {
254    return false;
255  }
256  constexpr llvm::StringLiteral g_anonymous("anonymous");
257  if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
258      Peek().getRawIdentifier() == g_anonymous) {
259    Advance();
260  } else {
261    return false;
262  }
263
264  if (!ConsumeToken(tok::kw_namespace)) {
265    return false;
266  }
267
268  if (!ConsumeToken(tok::r_paren)) {
269    return false;
270  }
271  start_position.Remove();
272  return true;
273}
274
275bool CPlusPlusNameParser::ConsumeLambda() {
276  Bookmark start_position = SetBookmark();
277  if (!ConsumeToken(tok::l_brace)) {
278    return false;
279  }
280  constexpr llvm::StringLiteral g_lambda("lambda");
281  if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
282      Peek().getRawIdentifier() == g_lambda) {
283    // Put the matched brace back so we can use ConsumeBrackets
284    TakeBack();
285  } else {
286    return false;
287  }
288
289  if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
290    return false;
291  }
292
293  start_position.Remove();
294  return true;
295}
296
297bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
298                                          tok::TokenKind right) {
299  Bookmark start_position = SetBookmark();
300  if (!HasMoreTokens() || Peek().getKind() != left)
301    return false;
302  Advance();
303
304  int counter = 1;
305  while (HasMoreTokens() && counter > 0) {
306    tok::TokenKind kind = Peek().getKind();
307    if (kind == right)
308      --counter;
309    else if (kind == left)
310      ++counter;
311    Advance();
312  }
313
314  assert(counter >= 0);
315  if (counter > 0) {
316    return false;
317  }
318  start_position.Remove();
319  return true;
320}
321
322bool CPlusPlusNameParser::ConsumeOperator() {
323  Bookmark start_position = SetBookmark();
324  if (!ConsumeToken(tok::kw_operator))
325    return false;
326
327  if (!HasMoreTokens()) {
328    return false;
329  }
330
331  const auto &token = Peek();
332  switch (token.getKind()) {
333  case tok::kw_new:
334  case tok::kw_delete:
335    // This is 'new' or 'delete' operators.
336    Advance();
337    // Check for array new/delete.
338    if (HasMoreTokens() && Peek().is(tok::l_square)) {
339      // Consume the '[' and ']'.
340      if (!ConsumeBrackets(tok::l_square, tok::r_square))
341        return false;
342    }
343    break;
344
345#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly)  \
346  case tok::Token:                                                             \
347    Advance();                                                                 \
348    break;
349#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
350#include "clang/Basic/OperatorKinds.def"
351#undef OVERLOADED_OPERATOR
352#undef OVERLOADED_OPERATOR_MULTI
353
354  case tok::l_paren:
355    // Call operator consume '(' ... ')'.
356    if (ConsumeBrackets(tok::l_paren, tok::r_paren))
357      break;
358    return false;
359
360  case tok::l_square:
361    // This is a [] operator.
362    // Consume the '[' and ']'.
363    if (ConsumeBrackets(tok::l_square, tok::r_square))
364      break;
365    return false;
366
367  default:
368    // This might be a cast operator.
369    if (ConsumeTypename())
370      break;
371    return false;
372  }
373  start_position.Remove();
374  return true;
375}
376
377void CPlusPlusNameParser::SkipTypeQualifiers() {
378  while (ConsumeToken(tok::kw_const, tok::kw_volatile))
379    ;
380}
381
382void CPlusPlusNameParser::SkipFunctionQualifiers() {
383  while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
384    ;
385}
386
387bool CPlusPlusNameParser::ConsumeBuiltinType() {
388  bool result = false;
389  bool continue_parsing = true;
390  // Built-in types can be made of a few keywords like 'unsigned long long
391  // int'. This function consumes all built-in type keywords without checking
392  // if they make sense like 'unsigned char void'.
393  while (continue_parsing && HasMoreTokens()) {
394    switch (Peek().getKind()) {
395    case tok::kw_short:
396    case tok::kw_long:
397    case tok::kw___int64:
398    case tok::kw___int128:
399    case tok::kw_signed:
400    case tok::kw_unsigned:
401    case tok::kw_void:
402    case tok::kw_char:
403    case tok::kw_int:
404    case tok::kw_half:
405    case tok::kw_float:
406    case tok::kw_double:
407    case tok::kw___float128:
408    case tok::kw_wchar_t:
409    case tok::kw_bool:
410    case tok::kw_char16_t:
411    case tok::kw_char32_t:
412      result = true;
413      Advance();
414      break;
415    default:
416      continue_parsing = false;
417      break;
418    }
419  }
420  return result;
421}
422
423void CPlusPlusNameParser::SkipPtrsAndRefs() {
424  // Ignoring result.
425  ConsumePtrsAndRefs();
426}
427
428bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
429  bool found = false;
430  SkipTypeQualifiers();
431  while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
432                      tok::kw_volatile)) {
433    found = true;
434    SkipTypeQualifiers();
435  }
436  return found;
437}
438
439bool CPlusPlusNameParser::ConsumeDecltype() {
440  Bookmark start_position = SetBookmark();
441  if (!ConsumeToken(tok::kw_decltype))
442    return false;
443
444  if (!ConsumeArguments())
445    return false;
446
447  start_position.Remove();
448  return true;
449}
450
451bool CPlusPlusNameParser::ConsumeTypename() {
452  Bookmark start_position = SetBookmark();
453  SkipTypeQualifiers();
454  if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
455    if (!ParseFullNameImpl())
456      return false;
457  }
458  SkipPtrsAndRefs();
459  start_position.Remove();
460  return true;
461}
462
463Optional<CPlusPlusNameParser::ParsedNameRanges>
464CPlusPlusNameParser::ParseFullNameImpl() {
465  // Name parsing state machine.
466  enum class State {
467    Beginning,       // start of the name
468    AfterTwoColons,  // right after ::
469    AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
470    AfterTemplate,   // right after template brackets (<something>)
471    AfterOperator,   // right after name of C++ operator
472  };
473
474  Bookmark start_position = SetBookmark();
475  State state = State::Beginning;
476  bool continue_parsing = true;
477  Optional<size_t> last_coloncolon_position = None;
478
479  while (continue_parsing && HasMoreTokens()) {
480    const auto &token = Peek();
481    switch (token.getKind()) {
482    case tok::raw_identifier: // Just a name.
483      if (state != State::Beginning && state != State::AfterTwoColons) {
484        continue_parsing = false;
485        break;
486      }
487      Advance();
488      state = State::AfterIdentifier;
489      break;
490    case tok::l_paren: {
491      if (state == State::Beginning || state == State::AfterTwoColons) {
492        // (anonymous namespace)
493        if (ConsumeAnonymousNamespace()) {
494          state = State::AfterIdentifier;
495          break;
496        }
497      }
498
499      // Type declared inside a function 'func()::Type'
500      if (state != State::AfterIdentifier && state != State::AfterTemplate &&
501          state != State::AfterOperator) {
502        continue_parsing = false;
503        break;
504      }
505      Bookmark l_paren_position = SetBookmark();
506      // Consume the '(' ... ') [const]'.
507      if (!ConsumeArguments()) {
508        continue_parsing = false;
509        break;
510      }
511      SkipFunctionQualifiers();
512
513      // Consume '::'
514      size_t coloncolon_position = GetCurrentPosition();
515      if (!ConsumeToken(tok::coloncolon)) {
516        continue_parsing = false;
517        break;
518      }
519      l_paren_position.Remove();
520      last_coloncolon_position = coloncolon_position;
521      state = State::AfterTwoColons;
522      break;
523    }
524    case tok::l_brace:
525      if (state == State::Beginning || state == State::AfterTwoColons) {
526        if (ConsumeLambda()) {
527          state = State::AfterIdentifier;
528          break;
529        }
530      }
531      continue_parsing = false;
532      break;
533    case tok::coloncolon: // Type nesting delimiter.
534      if (state != State::Beginning && state != State::AfterIdentifier &&
535          state != State::AfterTemplate) {
536        continue_parsing = false;
537        break;
538      }
539      last_coloncolon_position = GetCurrentPosition();
540      Advance();
541      state = State::AfterTwoColons;
542      break;
543    case tok::less: // Template brackets.
544      if (state != State::AfterIdentifier && state != State::AfterOperator) {
545        continue_parsing = false;
546        break;
547      }
548      if (!ConsumeTemplateArgs()) {
549        continue_parsing = false;
550        break;
551      }
552      state = State::AfterTemplate;
553      break;
554    case tok::kw_operator: // C++ operator overloading.
555      if (state != State::Beginning && state != State::AfterTwoColons) {
556        continue_parsing = false;
557        break;
558      }
559      if (!ConsumeOperator()) {
560        continue_parsing = false;
561        break;
562      }
563      state = State::AfterOperator;
564      break;
565    case tok::tilde: // Destructor.
566      if (state != State::Beginning && state != State::AfterTwoColons) {
567        continue_parsing = false;
568        break;
569      }
570      Advance();
571      if (ConsumeToken(tok::raw_identifier)) {
572        state = State::AfterIdentifier;
573      } else {
574        TakeBack();
575        continue_parsing = false;
576      }
577      break;
578    default:
579      continue_parsing = false;
580      break;
581    }
582  }
583
584  if (state == State::AfterIdentifier || state == State::AfterOperator ||
585      state == State::AfterTemplate) {
586    ParsedNameRanges result;
587    if (last_coloncolon_position) {
588      result.context_range = Range(start_position.GetSavedPosition(),
589                                   last_coloncolon_position.getValue());
590      result.basename_range =
591          Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
592    } else {
593      result.basename_range =
594          Range(start_position.GetSavedPosition(), GetCurrentPosition());
595    }
596    start_position.Remove();
597    return result;
598  } else {
599    return None;
600  }
601}
602
603llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
604  if (range.empty())
605    return llvm::StringRef();
606  assert(range.begin_index < range.end_index);
607  assert(range.begin_index < m_tokens.size());
608  assert(range.end_index <= m_tokens.size());
609  clang::Token &first_token = m_tokens[range.begin_index];
610  clang::Token &last_token = m_tokens[range.end_index - 1];
611  clang::SourceLocation start_loc = first_token.getLocation();
612  clang::SourceLocation end_loc = last_token.getLocation();
613  unsigned start_pos = start_loc.getRawEncoding();
614  unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
615  return m_text.take_front(end_pos).drop_front(start_pos);
616}
617
618static const clang::LangOptions &GetLangOptions() {
619  static clang::LangOptions g_options;
620  static llvm::once_flag g_once_flag;
621  llvm::call_once(g_once_flag, []() {
622    g_options.LineComment = true;
623    g_options.C99 = true;
624    g_options.C11 = true;
625    g_options.CPlusPlus = true;
626    g_options.CPlusPlus11 = true;
627    g_options.CPlusPlus14 = true;
628    g_options.CPlusPlus17 = true;
629  });
630  return g_options;
631}
632
633static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
634  static llvm::StringMap<tok::TokenKind> g_map{
635#define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
636#include "clang/Basic/TokenKinds.def"
637#undef KEYWORD
638  };
639  return g_map;
640}
641
642void CPlusPlusNameParser::ExtractTokens() {
643  if (m_text.empty())
644    return;
645  clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
646                     m_text.data(), m_text.data() + m_text.size());
647  const auto &kw_map = GetKeywordsMap();
648  clang::Token token;
649  for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
650       lexer.LexFromRawLexer(token)) {
651    if (token.is(clang::tok::raw_identifier)) {
652      auto it = kw_map.find(token.getRawIdentifier());
653      if (it != kw_map.end()) {
654        token.setKind(it->getValue());
655      }
656    }
657
658    m_tokens.push_back(token);
659  }
660}
661