CPlusPlusNameParser.cpp revision 341825
1//===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "CPlusPlusNameParser.h"
11
12#include "clang/Basic/IdentifierTable.h"
13#include "llvm/ADT/StringMap.h"
14#include "llvm/Support/Threading.h"
15
16using namespace lldb;
17using namespace lldb_private;
18using llvm::Optional;
19using llvm::None;
20using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
21using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
22namespace tok = clang::tok;
23
24Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
25  m_next_token_index = 0;
26  Optional<ParsedFunction> result(None);
27
28  // Try to parse the name as function without a return type specified e.g.
29  // main(int, char*[])
30  {
31    Bookmark start_position = SetBookmark();
32    result = ParseFunctionImpl(false);
33    if (result && !HasMoreTokens())
34      return result;
35  }
36
37  // Try to parse the name as function with function pointer return type e.g.
38  // void (*get_func(const char*))()
39  result = ParseFuncPtr(true);
40  if (result)
41    return result;
42
43  // Finally try to parse the name as a function with non-function return type
44  // e.g. int main(int, char*[])
45  result = ParseFunctionImpl(true);
46  if (HasMoreTokens())
47    return None;
48  return result;
49}
50
51Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
52  m_next_token_index = 0;
53  Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
54  if (!name_ranges)
55    return None;
56  if (HasMoreTokens())
57    return None;
58  ParsedName result;
59  result.basename = GetTextForRange(name_ranges.getValue().basename_range);
60  result.context = GetTextForRange(name_ranges.getValue().context_range);
61  return result;
62}
63
64bool CPlusPlusNameParser::HasMoreTokens() {
65  return m_next_token_index < m_tokens.size();
66}
67
68void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
69
70void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
71
72bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
73  if (!HasMoreTokens())
74    return false;
75
76  if (!Peek().is(kind))
77    return false;
78
79  Advance();
80  return true;
81}
82
83template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
84  if (!HasMoreTokens())
85    return false;
86
87  if (!Peek().isOneOf(kinds...))
88    return false;
89
90  Advance();
91  return true;
92}
93
94CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
95  return Bookmark(m_next_token_index);
96}
97
98size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
99
100clang::Token &CPlusPlusNameParser::Peek() {
101  assert(HasMoreTokens());
102  return m_tokens[m_next_token_index];
103}
104
105Optional<ParsedFunction>
106CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
107  Bookmark start_position = SetBookmark();
108  if (expect_return_type) {
109    // Consume return type if it's expected.
110    if (!ConsumeTypename())
111      return None;
112  }
113
114  auto maybe_name = ParseFullNameImpl();
115  if (!maybe_name) {
116    return None;
117  }
118
119  size_t argument_start = GetCurrentPosition();
120  if (!ConsumeArguments()) {
121    return None;
122  }
123
124  size_t qualifiers_start = GetCurrentPosition();
125  SkipFunctionQualifiers();
126  size_t end_position = GetCurrentPosition();
127
128  ParsedFunction result;
129  result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
130  result.name.context = GetTextForRange(maybe_name.getValue().context_range);
131  result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
132  result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
133  start_position.Remove();
134  return result;
135}
136
137Optional<ParsedFunction>
138CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
139  Bookmark start_position = SetBookmark();
140  if (expect_return_type) {
141    // Consume return type.
142    if (!ConsumeTypename())
143      return None;
144  }
145
146  if (!ConsumeToken(tok::l_paren))
147    return None;
148  if (!ConsumePtrsAndRefs())
149    return None;
150
151  {
152    Bookmark before_inner_function_pos = SetBookmark();
153    auto maybe_inner_function_name = ParseFunctionImpl(false);
154    if (maybe_inner_function_name)
155      if (ConsumeToken(tok::r_paren))
156        if (ConsumeArguments()) {
157          SkipFunctionQualifiers();
158          start_position.Remove();
159          before_inner_function_pos.Remove();
160          return maybe_inner_function_name;
161        }
162  }
163
164  auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
165  if (maybe_inner_function_ptr_name)
166    if (ConsumeToken(tok::r_paren))
167      if (ConsumeArguments()) {
168        SkipFunctionQualifiers();
169        start_position.Remove();
170        return maybe_inner_function_ptr_name;
171      }
172  return None;
173}
174
175bool CPlusPlusNameParser::ConsumeArguments() {
176  return ConsumeBrackets(tok::l_paren, tok::r_paren);
177}
178
179bool CPlusPlusNameParser::ConsumeTemplateArgs() {
180  Bookmark start_position = SetBookmark();
181  if (!HasMoreTokens() || Peek().getKind() != tok::less)
182    return false;
183  Advance();
184
185  // Consuming template arguments is a bit trickier than consuming function
186  // arguments, because '<' '>' brackets are not always trivially balanced. In
187  // some rare cases tokens '<' and '>' can appear inside template arguments as
188  // arithmetic or shift operators not as template brackets. Examples:
189  // std::enable_if<(10u)<(64), bool>
190  //           f<A<operator<(X,Y)::Subclass>>
191  // Good thing that compiler makes sure that really ambiguous cases of '>'
192  // usage should be enclosed within '()' brackets.
193  int template_counter = 1;
194  bool can_open_template = false;
195  while (HasMoreTokens() && template_counter > 0) {
196    tok::TokenKind kind = Peek().getKind();
197    switch (kind) {
198    case tok::greatergreater:
199      template_counter -= 2;
200      can_open_template = false;
201      Advance();
202      break;
203    case tok::greater:
204      --template_counter;
205      can_open_template = false;
206      Advance();
207      break;
208    case tok::less:
209      // '<' is an attempt to open a subteamplte
210      // check if parser is at the point where it's actually possible,
211      // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
212      // need to do the same for '>' because compiler actually makes sure that
213      // '>' always surrounded by brackets to avoid ambiguity.
214      if (can_open_template)
215        ++template_counter;
216      can_open_template = false;
217      Advance();
218      break;
219    case tok::kw_operator: // C++ operator overloading.
220      if (!ConsumeOperator())
221        return false;
222      can_open_template = true;
223      break;
224    case tok::raw_identifier:
225      can_open_template = true;
226      Advance();
227      break;
228    case tok::l_square:
229      if (!ConsumeBrackets(tok::l_square, tok::r_square))
230        return false;
231      can_open_template = false;
232      break;
233    case tok::l_paren:
234      if (!ConsumeArguments())
235        return false;
236      can_open_template = false;
237      break;
238    default:
239      can_open_template = false;
240      Advance();
241      break;
242    }
243  }
244
245  if (template_counter != 0) {
246    return false;
247  }
248  start_position.Remove();
249  return true;
250}
251
252bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
253  Bookmark start_position = SetBookmark();
254  if (!ConsumeToken(tok::l_paren)) {
255    return false;
256  }
257  constexpr llvm::StringLiteral g_anonymous("anonymous");
258  if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
259      Peek().getRawIdentifier() == g_anonymous) {
260    Advance();
261  } else {
262    return false;
263  }
264
265  if (!ConsumeToken(tok::kw_namespace)) {
266    return false;
267  }
268
269  if (!ConsumeToken(tok::r_paren)) {
270    return false;
271  }
272  start_position.Remove();
273  return true;
274}
275
276bool CPlusPlusNameParser::ConsumeLambda() {
277  Bookmark start_position = SetBookmark();
278  if (!ConsumeToken(tok::l_brace)) {
279    return false;
280  }
281  constexpr llvm::StringLiteral g_lambda("lambda");
282  if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
283      Peek().getRawIdentifier() == g_lambda) {
284    // Put the matched brace back so we can use ConsumeBrackets
285    TakeBack();
286  } else {
287    return false;
288  }
289
290  if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
291    return false;
292  }
293
294  start_position.Remove();
295  return true;
296}
297
298bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
299                                          tok::TokenKind right) {
300  Bookmark start_position = SetBookmark();
301  if (!HasMoreTokens() || Peek().getKind() != left)
302    return false;
303  Advance();
304
305  int counter = 1;
306  while (HasMoreTokens() && counter > 0) {
307    tok::TokenKind kind = Peek().getKind();
308    if (kind == right)
309      --counter;
310    else if (kind == left)
311      ++counter;
312    Advance();
313  }
314
315  assert(counter >= 0);
316  if (counter > 0) {
317    return false;
318  }
319  start_position.Remove();
320  return true;
321}
322
323bool CPlusPlusNameParser::ConsumeOperator() {
324  Bookmark start_position = SetBookmark();
325  if (!ConsumeToken(tok::kw_operator))
326    return false;
327
328  if (!HasMoreTokens()) {
329    return false;
330  }
331
332  const auto &token = Peek();
333  switch (token.getKind()) {
334  case tok::kw_new:
335  case tok::kw_delete:
336    // This is 'new' or 'delete' operators.
337    Advance();
338    // Check for array new/delete.
339    if (HasMoreTokens() && Peek().is(tok::l_square)) {
340      // Consume the '[' and ']'.
341      if (!ConsumeBrackets(tok::l_square, tok::r_square))
342        return false;
343    }
344    break;
345
346#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly)  \
347  case tok::Token:                                                             \
348    Advance();                                                                 \
349    break;
350#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
351#include "clang/Basic/OperatorKinds.def"
352#undef OVERLOADED_OPERATOR
353#undef OVERLOADED_OPERATOR_MULTI
354
355  case tok::l_paren:
356    // Call operator consume '(' ... ')'.
357    if (ConsumeBrackets(tok::l_paren, tok::r_paren))
358      break;
359    return false;
360
361  case tok::l_square:
362    // This is a [] operator.
363    // Consume the '[' and ']'.
364    if (ConsumeBrackets(tok::l_square, tok::r_square))
365      break;
366    return false;
367
368  default:
369    // This might be a cast operator.
370    if (ConsumeTypename())
371      break;
372    return false;
373  }
374  start_position.Remove();
375  return true;
376}
377
378void CPlusPlusNameParser::SkipTypeQualifiers() {
379  while (ConsumeToken(tok::kw_const, tok::kw_volatile))
380    ;
381}
382
383void CPlusPlusNameParser::SkipFunctionQualifiers() {
384  while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
385    ;
386}
387
388bool CPlusPlusNameParser::ConsumeBuiltinType() {
389  bool result = false;
390  bool continue_parsing = true;
391  // Built-in types can be made of a few keywords like 'unsigned long long
392  // int'. This function consumes all built-in type keywords without checking
393  // if they make sense like 'unsigned char void'.
394  while (continue_parsing && HasMoreTokens()) {
395    switch (Peek().getKind()) {
396    case tok::kw_short:
397    case tok::kw_long:
398    case tok::kw___int64:
399    case tok::kw___int128:
400    case tok::kw_signed:
401    case tok::kw_unsigned:
402    case tok::kw_void:
403    case tok::kw_char:
404    case tok::kw_int:
405    case tok::kw_half:
406    case tok::kw_float:
407    case tok::kw_double:
408    case tok::kw___float128:
409    case tok::kw_wchar_t:
410    case tok::kw_bool:
411    case tok::kw_char16_t:
412    case tok::kw_char32_t:
413      result = true;
414      Advance();
415      break;
416    default:
417      continue_parsing = false;
418      break;
419    }
420  }
421  return result;
422}
423
424void CPlusPlusNameParser::SkipPtrsAndRefs() {
425  // Ignoring result.
426  ConsumePtrsAndRefs();
427}
428
429bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
430  bool found = false;
431  SkipTypeQualifiers();
432  while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
433                      tok::kw_volatile)) {
434    found = true;
435    SkipTypeQualifiers();
436  }
437  return found;
438}
439
440bool CPlusPlusNameParser::ConsumeDecltype() {
441  Bookmark start_position = SetBookmark();
442  if (!ConsumeToken(tok::kw_decltype))
443    return false;
444
445  if (!ConsumeArguments())
446    return false;
447
448  start_position.Remove();
449  return true;
450}
451
452bool CPlusPlusNameParser::ConsumeTypename() {
453  Bookmark start_position = SetBookmark();
454  SkipTypeQualifiers();
455  if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
456    if (!ParseFullNameImpl())
457      return false;
458  }
459  SkipPtrsAndRefs();
460  start_position.Remove();
461  return true;
462}
463
464Optional<CPlusPlusNameParser::ParsedNameRanges>
465CPlusPlusNameParser::ParseFullNameImpl() {
466  // Name parsing state machine.
467  enum class State {
468    Beginning,       // start of the name
469    AfterTwoColons,  // right after ::
470    AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
471    AfterTemplate,   // right after template brackets (<something>)
472    AfterOperator,   // right after name of C++ operator
473  };
474
475  Bookmark start_position = SetBookmark();
476  State state = State::Beginning;
477  bool continue_parsing = true;
478  Optional<size_t> last_coloncolon_position = None;
479
480  while (continue_parsing && HasMoreTokens()) {
481    const auto &token = Peek();
482    switch (token.getKind()) {
483    case tok::raw_identifier: // Just a name.
484      if (state != State::Beginning && state != State::AfterTwoColons) {
485        continue_parsing = false;
486        break;
487      }
488      Advance();
489      state = State::AfterIdentifier;
490      break;
491    case tok::l_paren: {
492      if (state == State::Beginning || state == State::AfterTwoColons) {
493        // (anonymous namespace)
494        if (ConsumeAnonymousNamespace()) {
495          state = State::AfterIdentifier;
496          break;
497        }
498      }
499
500      // Type declared inside a function 'func()::Type'
501      if (state != State::AfterIdentifier && state != State::AfterTemplate &&
502          state != State::AfterOperator) {
503        continue_parsing = false;
504        break;
505      }
506      Bookmark l_paren_position = SetBookmark();
507      // Consume the '(' ... ') [const]'.
508      if (!ConsumeArguments()) {
509        continue_parsing = false;
510        break;
511      }
512      SkipFunctionQualifiers();
513
514      // Consume '::'
515      size_t coloncolon_position = GetCurrentPosition();
516      if (!ConsumeToken(tok::coloncolon)) {
517        continue_parsing = false;
518        break;
519      }
520      l_paren_position.Remove();
521      last_coloncolon_position = coloncolon_position;
522      state = State::AfterTwoColons;
523      break;
524    }
525    case tok::l_brace:
526      if (state == State::Beginning || state == State::AfterTwoColons) {
527        if (ConsumeLambda()) {
528          state = State::AfterIdentifier;
529          break;
530        }
531      }
532      continue_parsing = false;
533      break;
534    case tok::coloncolon: // Type nesting delimiter.
535      if (state != State::Beginning && state != State::AfterIdentifier &&
536          state != State::AfterTemplate) {
537        continue_parsing = false;
538        break;
539      }
540      last_coloncolon_position = GetCurrentPosition();
541      Advance();
542      state = State::AfterTwoColons;
543      break;
544    case tok::less: // Template brackets.
545      if (state != State::AfterIdentifier && state != State::AfterOperator) {
546        continue_parsing = false;
547        break;
548      }
549      if (!ConsumeTemplateArgs()) {
550        continue_parsing = false;
551        break;
552      }
553      state = State::AfterTemplate;
554      break;
555    case tok::kw_operator: // C++ operator overloading.
556      if (state != State::Beginning && state != State::AfterTwoColons) {
557        continue_parsing = false;
558        break;
559      }
560      if (!ConsumeOperator()) {
561        continue_parsing = false;
562        break;
563      }
564      state = State::AfterOperator;
565      break;
566    case tok::tilde: // Destructor.
567      if (state != State::Beginning && state != State::AfterTwoColons) {
568        continue_parsing = false;
569        break;
570      }
571      Advance();
572      if (ConsumeToken(tok::raw_identifier)) {
573        state = State::AfterIdentifier;
574      } else {
575        TakeBack();
576        continue_parsing = false;
577      }
578      break;
579    default:
580      continue_parsing = false;
581      break;
582    }
583  }
584
585  if (state == State::AfterIdentifier || state == State::AfterOperator ||
586      state == State::AfterTemplate) {
587    ParsedNameRanges result;
588    if (last_coloncolon_position) {
589      result.context_range = Range(start_position.GetSavedPosition(),
590                                   last_coloncolon_position.getValue());
591      result.basename_range =
592          Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
593    } else {
594      result.basename_range =
595          Range(start_position.GetSavedPosition(), GetCurrentPosition());
596    }
597    start_position.Remove();
598    return result;
599  } else {
600    return None;
601  }
602}
603
604llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
605  if (range.empty())
606    return llvm::StringRef();
607  assert(range.begin_index < range.end_index);
608  assert(range.begin_index < m_tokens.size());
609  assert(range.end_index <= m_tokens.size());
610  clang::Token &first_token = m_tokens[range.begin_index];
611  clang::Token &last_token = m_tokens[range.end_index - 1];
612  clang::SourceLocation start_loc = first_token.getLocation();
613  clang::SourceLocation end_loc = last_token.getLocation();
614  unsigned start_pos = start_loc.getRawEncoding();
615  unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
616  return m_text.take_front(end_pos).drop_front(start_pos);
617}
618
619static const clang::LangOptions &GetLangOptions() {
620  static clang::LangOptions g_options;
621  static llvm::once_flag g_once_flag;
622  llvm::call_once(g_once_flag, []() {
623    g_options.LineComment = true;
624    g_options.C99 = true;
625    g_options.C11 = true;
626    g_options.CPlusPlus = true;
627    g_options.CPlusPlus11 = true;
628    g_options.CPlusPlus14 = true;
629    g_options.CPlusPlus17 = true;
630  });
631  return g_options;
632}
633
634static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
635  static llvm::StringMap<tok::TokenKind> g_map{
636#define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
637#include "clang/Basic/TokenKinds.def"
638#undef KEYWORD
639  };
640  return g_map;
641}
642
643void CPlusPlusNameParser::ExtractTokens() {
644  clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
645                     m_text.data(), m_text.data() + m_text.size());
646  const auto &kw_map = GetKeywordsMap();
647  clang::Token token;
648  for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
649       lexer.LexFromRawLexer(token)) {
650    if (token.is(clang::tok::raw_identifier)) {
651      auto it = kw_map.find(token.getRawIdentifier());
652      if (it != kw_map.end()) {
653        token.setKind(it->getValue());
654      }
655    }
656
657    m_tokens.push_back(token);
658  }
659}
660