CPlusPlusNameParser.cpp revision 321369
1//===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "CPlusPlusNameParser.h"
11
12#include "clang/Basic/IdentifierTable.h"
13#include "llvm/ADT/StringMap.h"
14#include "llvm/Support/Threading.h"
15
16using namespace lldb;
17using namespace lldb_private;
18using llvm::Optional;
19using llvm::None;
20using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
21using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
22namespace tok = clang::tok;
23
24Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
25  m_next_token_index = 0;
26  Optional<ParsedFunction> result(None);
27
28  // Try to parse the name as function without a return type specified
29  // e.g. main(int, char*[])
30  {
31    Bookmark start_position = SetBookmark();
32    result = ParseFunctionImpl(false);
33    if (result && !HasMoreTokens())
34      return result;
35  }
36
37  // Try to parse the name as function with function pointer return type
38  // e.g. void (*get_func(const char*))()
39  result = ParseFuncPtr(true);
40  if (result)
41    return result;
42
43  // Finally try to parse the name as a function with non-function return type
44  // e.g. int main(int, char*[])
45  result = ParseFunctionImpl(true);
46  if (HasMoreTokens())
47    return None;
48  return result;
49}
50
51Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
52  m_next_token_index = 0;
53  Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
54  if (!name_ranges)
55    return None;
56  if (HasMoreTokens())
57    return None;
58  ParsedName result;
59  result.basename = GetTextForRange(name_ranges.getValue().basename_range);
60  result.context = GetTextForRange(name_ranges.getValue().context_range);
61  return result;
62}
63
64bool CPlusPlusNameParser::HasMoreTokens() {
65  return m_next_token_index < m_tokens.size();
66}
67
68void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
69
70void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
71
72bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
73  if (!HasMoreTokens())
74    return false;
75
76  if (!Peek().is(kind))
77    return false;
78
79  Advance();
80  return true;
81}
82
83template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
84  if (!HasMoreTokens())
85    return false;
86
87  if (!Peek().isOneOf(kinds...))
88    return false;
89
90  Advance();
91  return true;
92}
93
94CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
95  return Bookmark(m_next_token_index);
96}
97
98size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
99
100clang::Token &CPlusPlusNameParser::Peek() {
101  assert(HasMoreTokens());
102  return m_tokens[m_next_token_index];
103}
104
105Optional<ParsedFunction>
106CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
107  Bookmark start_position = SetBookmark();
108  if (expect_return_type) {
109    // Consume return type if it's expected.
110    if (!ConsumeTypename())
111      return None;
112  }
113
114  auto maybe_name = ParseFullNameImpl();
115  if (!maybe_name) {
116    return None;
117  }
118
119  size_t argument_start = GetCurrentPosition();
120  if (!ConsumeArguments()) {
121    return None;
122  }
123
124  size_t qualifiers_start = GetCurrentPosition();
125  SkipFunctionQualifiers();
126  size_t end_position = GetCurrentPosition();
127
128  ParsedFunction result;
129  result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
130  result.name.context = GetTextForRange(maybe_name.getValue().context_range);
131  result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
132  result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
133  start_position.Remove();
134  return result;
135}
136
137Optional<ParsedFunction>
138CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
139  Bookmark start_position = SetBookmark();
140  if (expect_return_type) {
141    // Consume return type.
142    if (!ConsumeTypename())
143      return None;
144  }
145
146  if (!ConsumeToken(tok::l_paren))
147    return None;
148  if (!ConsumePtrsAndRefs())
149    return None;
150
151  {
152    Bookmark before_inner_function_pos = SetBookmark();
153    auto maybe_inner_function_name = ParseFunctionImpl(false);
154    if (maybe_inner_function_name)
155      if (ConsumeToken(tok::r_paren))
156        if (ConsumeArguments()) {
157          SkipFunctionQualifiers();
158          start_position.Remove();
159          before_inner_function_pos.Remove();
160          return maybe_inner_function_name;
161        }
162  }
163
164  auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
165  if (maybe_inner_function_ptr_name)
166    if (ConsumeToken(tok::r_paren))
167      if (ConsumeArguments()) {
168        SkipFunctionQualifiers();
169        start_position.Remove();
170        return maybe_inner_function_ptr_name;
171      }
172  return None;
173}
174
175bool CPlusPlusNameParser::ConsumeArguments() {
176  return ConsumeBrackets(tok::l_paren, tok::r_paren);
177}
178
179bool CPlusPlusNameParser::ConsumeTemplateArgs() {
180  Bookmark start_position = SetBookmark();
181  if (!HasMoreTokens() || Peek().getKind() != tok::less)
182    return false;
183  Advance();
184
185  // Consuming template arguments is a bit trickier than consuming function
186  // arguments, because '<' '>' brackets are not always trivially balanced.
187  // In some rare cases tokens '<' and '>' can appear inside template arguments
188  // as arithmetic or shift operators not as template brackets.
189  // Examples: std::enable_if<(10u)<(64), bool>
190  //           f<A<operator<(X,Y)::Subclass>>
191  // Good thing that compiler makes sure that really ambiguous cases of
192  // '>' usage should be enclosed within '()' brackets.
193  int template_counter = 1;
194  bool can_open_template = false;
195  while (HasMoreTokens() && template_counter > 0) {
196    tok::TokenKind kind = Peek().getKind();
197    switch (kind) {
198    case tok::greatergreater:
199      template_counter -= 2;
200      can_open_template = false;
201      Advance();
202      break;
203    case tok::greater:
204      --template_counter;
205      can_open_template = false;
206      Advance();
207      break;
208    case tok::less:
209      // '<' is an attempt to open a subteamplte
210      // check if parser is at the point where it's actually possible,
211      // otherwise it's just a part of an expression like 'sizeof(T)<(10)'.
212      // No need to do the same for '>' because compiler actually makes sure
213      // that '>' always surrounded by brackets to avoid ambiguity.
214      if (can_open_template)
215        ++template_counter;
216      can_open_template = false;
217      Advance();
218      break;
219    case tok::kw_operator: // C++ operator overloading.
220      if (!ConsumeOperator())
221        return false;
222      can_open_template = true;
223      break;
224    case tok::raw_identifier:
225      can_open_template = true;
226      Advance();
227      break;
228    case tok::l_square:
229      if (!ConsumeBrackets(tok::l_square, tok::r_square))
230        return false;
231      can_open_template = false;
232      break;
233    case tok::l_paren:
234      if (!ConsumeArguments())
235        return false;
236      can_open_template = false;
237      break;
238    default:
239      can_open_template = false;
240      Advance();
241      break;
242    }
243  }
244
245  assert(template_counter >= 0);
246  if (template_counter > 0) {
247    return false;
248  }
249  start_position.Remove();
250  return true;
251}
252
253bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
254  Bookmark start_position = SetBookmark();
255  if (!ConsumeToken(tok::l_paren)) {
256    return false;
257  }
258  constexpr llvm::StringLiteral g_anonymous("anonymous");
259  if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
260      Peek().getRawIdentifier() == g_anonymous) {
261    Advance();
262  } else {
263    return false;
264  }
265
266  if (!ConsumeToken(tok::kw_namespace)) {
267    return false;
268  }
269
270  if (!ConsumeToken(tok::r_paren)) {
271    return false;
272  }
273  start_position.Remove();
274  return true;
275}
276
277bool CPlusPlusNameParser::ConsumeLambda() {
278  Bookmark start_position = SetBookmark();
279  if (!ConsumeToken(tok::l_brace)) {
280    return false;
281  }
282  constexpr llvm::StringLiteral g_lambda("lambda");
283  if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
284      Peek().getRawIdentifier() == g_lambda) {
285    // Put the matched brace back so we can use ConsumeBrackets
286    TakeBack();
287  } else {
288    return false;
289  }
290
291  if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
292    return false;
293  }
294
295  start_position.Remove();
296  return true;
297}
298
299bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
300                                          tok::TokenKind right) {
301  Bookmark start_position = SetBookmark();
302  if (!HasMoreTokens() || Peek().getKind() != left)
303    return false;
304  Advance();
305
306  int counter = 1;
307  while (HasMoreTokens() && counter > 0) {
308    tok::TokenKind kind = Peek().getKind();
309    if (kind == right)
310      --counter;
311    else if (kind == left)
312      ++counter;
313    Advance();
314  }
315
316  assert(counter >= 0);
317  if (counter > 0) {
318    return false;
319  }
320  start_position.Remove();
321  return true;
322}
323
324bool CPlusPlusNameParser::ConsumeOperator() {
325  Bookmark start_position = SetBookmark();
326  if (!ConsumeToken(tok::kw_operator))
327    return false;
328
329  if (!HasMoreTokens()) {
330    return false;
331  }
332
333  const auto &token = Peek();
334  switch (token.getKind()) {
335  case tok::kw_new:
336  case tok::kw_delete:
337    // This is 'new' or 'delete' operators.
338    Advance();
339    // Check for array new/delete.
340    if (HasMoreTokens() && Peek().is(tok::l_square)) {
341      // Consume the '[' and ']'.
342      if (!ConsumeBrackets(tok::l_square, tok::r_square))
343        return false;
344    }
345    break;
346
347#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly)  \
348  case tok::Token:                                                             \
349    Advance();                                                                 \
350    break;
351#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
352#include "clang/Basic/OperatorKinds.def"
353#undef OVERLOADED_OPERATOR
354#undef OVERLOADED_OPERATOR_MULTI
355
356  case tok::l_paren:
357    // Call operator consume '(' ... ')'.
358    if (ConsumeBrackets(tok::l_paren, tok::r_paren))
359      break;
360    return false;
361
362  case tok::l_square:
363    // This is a [] operator.
364    // Consume the '[' and ']'.
365    if (ConsumeBrackets(tok::l_square, tok::r_square))
366      break;
367    return false;
368
369  default:
370    // This might be a cast operator.
371    if (ConsumeTypename())
372      break;
373    return false;
374  }
375  start_position.Remove();
376  return true;
377}
378
379void CPlusPlusNameParser::SkipTypeQualifiers() {
380  while (ConsumeToken(tok::kw_const, tok::kw_volatile))
381    ;
382}
383
384void CPlusPlusNameParser::SkipFunctionQualifiers() {
385  while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
386    ;
387}
388
389bool CPlusPlusNameParser::ConsumeBuiltinType() {
390  bool result = false;
391  bool continue_parsing = true;
392  // Built-in types can be made of a few keywords
393  // like 'unsigned long long int'. This function
394  // consumes all built-in type keywords without
395  // checking if they make sense like 'unsigned char void'.
396  while (continue_parsing && HasMoreTokens()) {
397    switch (Peek().getKind()) {
398    case tok::kw_short:
399    case tok::kw_long:
400    case tok::kw___int64:
401    case tok::kw___int128:
402    case tok::kw_signed:
403    case tok::kw_unsigned:
404    case tok::kw_void:
405    case tok::kw_char:
406    case tok::kw_int:
407    case tok::kw_half:
408    case tok::kw_float:
409    case tok::kw_double:
410    case tok::kw___float128:
411    case tok::kw_wchar_t:
412    case tok::kw_bool:
413    case tok::kw_char16_t:
414    case tok::kw_char32_t:
415      result = true;
416      Advance();
417      break;
418    default:
419      continue_parsing = false;
420      break;
421    }
422  }
423  return result;
424}
425
426void CPlusPlusNameParser::SkipPtrsAndRefs() {
427  // Ignoring result.
428  ConsumePtrsAndRefs();
429}
430
431bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
432  bool found = false;
433  SkipTypeQualifiers();
434  while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
435                      tok::kw_volatile)) {
436    found = true;
437    SkipTypeQualifiers();
438  }
439  return found;
440}
441
442bool CPlusPlusNameParser::ConsumeDecltype() {
443  Bookmark start_position = SetBookmark();
444  if (!ConsumeToken(tok::kw_decltype))
445    return false;
446
447  if (!ConsumeArguments())
448    return false;
449
450  start_position.Remove();
451  return true;
452}
453
454bool CPlusPlusNameParser::ConsumeTypename() {
455  Bookmark start_position = SetBookmark();
456  SkipTypeQualifiers();
457  if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
458    if (!ParseFullNameImpl())
459      return false;
460  }
461  SkipPtrsAndRefs();
462  start_position.Remove();
463  return true;
464}
465
466Optional<CPlusPlusNameParser::ParsedNameRanges>
467CPlusPlusNameParser::ParseFullNameImpl() {
468  // Name parsing state machine.
469  enum class State {
470    Beginning,       // start of the name
471    AfterTwoColons,  // right after ::
472    AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
473    AfterTemplate,   // right after template brackets (<something>)
474    AfterOperator,   // right after name of C++ operator
475  };
476
477  Bookmark start_position = SetBookmark();
478  State state = State::Beginning;
479  bool continue_parsing = true;
480  Optional<size_t> last_coloncolon_position = None;
481
482  while (continue_parsing && HasMoreTokens()) {
483    const auto &token = Peek();
484    switch (token.getKind()) {
485    case tok::raw_identifier: // Just a name.
486      if (state != State::Beginning && state != State::AfterTwoColons) {
487        continue_parsing = false;
488        break;
489      }
490      Advance();
491      state = State::AfterIdentifier;
492      break;
493    case tok::l_paren: {
494      if (state == State::Beginning || state == State::AfterTwoColons) {
495        // (anonymous namespace)
496        if (ConsumeAnonymousNamespace()) {
497          state = State::AfterIdentifier;
498          break;
499        }
500      }
501
502      // Type declared inside a function 'func()::Type'
503      if (state != State::AfterIdentifier && state != State::AfterTemplate &&
504          state != State::AfterOperator) {
505        continue_parsing = false;
506        break;
507      }
508      Bookmark l_paren_position = SetBookmark();
509      // Consume the '(' ... ') [const]'.
510      if (!ConsumeArguments()) {
511        continue_parsing = false;
512        break;
513      }
514      SkipFunctionQualifiers();
515
516      // Consume '::'
517      size_t coloncolon_position = GetCurrentPosition();
518      if (!ConsumeToken(tok::coloncolon)) {
519        continue_parsing = false;
520        break;
521      }
522      l_paren_position.Remove();
523      last_coloncolon_position = coloncolon_position;
524      state = State::AfterTwoColons;
525      break;
526    }
527    case tok::l_brace:
528      if (state == State::Beginning || state == State::AfterTwoColons) {
529        if (ConsumeLambda()) {
530          state = State::AfterIdentifier;
531          break;
532        }
533      }
534      continue_parsing = false;
535      break;
536    case tok::coloncolon: // Type nesting delimiter.
537      if (state != State::Beginning && state != State::AfterIdentifier &&
538          state != State::AfterTemplate) {
539        continue_parsing = false;
540        break;
541      }
542      last_coloncolon_position = GetCurrentPosition();
543      Advance();
544      state = State::AfterTwoColons;
545      break;
546    case tok::less: // Template brackets.
547      if (state != State::AfterIdentifier && state != State::AfterOperator) {
548        continue_parsing = false;
549        break;
550      }
551      if (!ConsumeTemplateArgs()) {
552        continue_parsing = false;
553        break;
554      }
555      state = State::AfterTemplate;
556      break;
557    case tok::kw_operator: // C++ operator overloading.
558      if (state != State::Beginning && state != State::AfterTwoColons) {
559        continue_parsing = false;
560        break;
561      }
562      if (!ConsumeOperator()) {
563        continue_parsing = false;
564        break;
565      }
566      state = State::AfterOperator;
567      break;
568    case tok::tilde: // Destructor.
569      if (state != State::Beginning && state != State::AfterTwoColons) {
570        continue_parsing = false;
571        break;
572      }
573      Advance();
574      if (ConsumeToken(tok::raw_identifier)) {
575        state = State::AfterIdentifier;
576      } else {
577        TakeBack();
578        continue_parsing = false;
579      }
580      break;
581    default:
582      continue_parsing = false;
583      break;
584    }
585  }
586
587  if (state == State::AfterIdentifier || state == State::AfterOperator ||
588      state == State::AfterTemplate) {
589    ParsedNameRanges result;
590    if (last_coloncolon_position) {
591      result.context_range = Range(start_position.GetSavedPosition(),
592                                   last_coloncolon_position.getValue());
593      result.basename_range =
594          Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
595    } else {
596      result.basename_range =
597          Range(start_position.GetSavedPosition(), GetCurrentPosition());
598    }
599    start_position.Remove();
600    return result;
601  } else {
602    return None;
603  }
604}
605
606llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
607  if (range.empty())
608    return llvm::StringRef();
609  assert(range.begin_index < range.end_index);
610  assert(range.begin_index < m_tokens.size());
611  assert(range.end_index <= m_tokens.size());
612  clang::Token &first_token = m_tokens[range.begin_index];
613  clang::Token &last_token = m_tokens[range.end_index - 1];
614  clang::SourceLocation start_loc = first_token.getLocation();
615  clang::SourceLocation end_loc = last_token.getLocation();
616  unsigned start_pos = start_loc.getRawEncoding();
617  unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
618  return m_text.take_front(end_pos).drop_front(start_pos);
619}
620
621static const clang::LangOptions &GetLangOptions() {
622  static clang::LangOptions g_options;
623  static llvm::once_flag g_once_flag;
624  llvm::call_once(g_once_flag, []() {
625    g_options.LineComment = true;
626    g_options.C99 = true;
627    g_options.C11 = true;
628    g_options.CPlusPlus = true;
629    g_options.CPlusPlus11 = true;
630    g_options.CPlusPlus14 = true;
631    g_options.CPlusPlus1z = true;
632  });
633  return g_options;
634}
635
636static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
637  static llvm::StringMap<tok::TokenKind> g_map{
638#define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
639#include "clang/Basic/TokenKinds.def"
640#undef KEYWORD
641  };
642  return g_map;
643}
644
645void CPlusPlusNameParser::ExtractTokens() {
646  clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
647                     m_text.data(), m_text.data() + m_text.size());
648  const auto &kw_map = GetKeywordsMap();
649  clang::Token token;
650  for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
651       lexer.LexFromRawLexer(token)) {
652    if (token.is(clang::tok::raw_identifier)) {
653      auto it = kw_map.find(token.getRawIdentifier());
654      if (it != kw_map.end()) {
655        token.setKind(it->getValue());
656      }
657    }
658
659    m_tokens.push_back(token);
660  }
661}
662