CPlusPlusNameParser.cpp revision 317032
1//===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "CPlusPlusNameParser.h"
11
12#include "clang/Basic/IdentifierTable.h"
13#include "llvm/ADT/StringMap.h"
14#include "llvm/Support/Threading.h"
15
16using namespace lldb;
17using namespace lldb_private;
18using llvm::Optional;
19using llvm::None;
20using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
21using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
22namespace tok = clang::tok;
23
24Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
25  m_next_token_index = 0;
26  Optional<ParsedFunction> result(None);
27
28  // Try to parse the name as function without a return type specified
29  // e.g. main(int, char*[])
30  {
31    Bookmark start_position = SetBookmark();
32    result = ParseFunctionImpl(false);
33    if (result && !HasMoreTokens())
34      return result;
35  }
36
37  // Try to parse the name as function with function pointer return type
38  // e.g. void (*get_func(const char*))()
39  result = ParseFuncPtr(true);
40  if (result)
41    return result;
42
43  // Finally try to parse the name as a function with non-function return type
44  // e.g. int main(int, char*[])
45  result = ParseFunctionImpl(true);
46  if (HasMoreTokens())
47    return None;
48  return result;
49}
50
51Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
52  m_next_token_index = 0;
53  Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
54  if (!name_ranges)
55    return None;
56  if (HasMoreTokens())
57    return None;
58  ParsedName result;
59  result.basename = GetTextForRange(name_ranges.getValue().basename_range);
60  result.context = GetTextForRange(name_ranges.getValue().context_range);
61  return result;
62}
63
64bool CPlusPlusNameParser::HasMoreTokens() {
65  return m_next_token_index < m_tokens.size();
66}
67
68void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
69
70void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
71
72bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
73  if (!HasMoreTokens())
74    return false;
75
76  if (!Peek().is(kind))
77    return false;
78
79  Advance();
80  return true;
81}
82
83template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
84  if (!HasMoreTokens())
85    return false;
86
87  if (!Peek().isOneOf(kinds...))
88    return false;
89
90  Advance();
91  return true;
92}
93
94CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
95  return Bookmark(m_next_token_index);
96}
97
98size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
99
100clang::Token &CPlusPlusNameParser::Peek() {
101  assert(HasMoreTokens());
102  return m_tokens[m_next_token_index];
103}
104
105Optional<ParsedFunction>
106CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
107  Bookmark start_position = SetBookmark();
108  if (expect_return_type) {
109    // Consume return type if it's expected.
110    if (!ConsumeTypename())
111      return None;
112  }
113
114  auto maybe_name = ParseFullNameImpl();
115  if (!maybe_name) {
116    return None;
117  }
118
119  size_t argument_start = GetCurrentPosition();
120  if (!ConsumeArguments()) {
121    return None;
122  }
123
124  size_t qualifiers_start = GetCurrentPosition();
125  SkipFunctionQualifiers();
126  size_t end_position = GetCurrentPosition();
127
128  ParsedFunction result;
129  result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
130  result.name.context = GetTextForRange(maybe_name.getValue().context_range);
131  result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
132  result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
133  start_position.Remove();
134  return result;
135}
136
137Optional<ParsedFunction>
138CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
139  Bookmark start_position = SetBookmark();
140  if (expect_return_type) {
141    // Consume return type.
142    if (!ConsumeTypename())
143      return None;
144  }
145
146  if (!ConsumeToken(tok::l_paren))
147    return None;
148  if (!ConsumePtrsAndRefs())
149    return None;
150
151  {
152    Bookmark before_inner_function_pos = SetBookmark();
153    auto maybe_inner_function_name = ParseFunctionImpl(false);
154    if (maybe_inner_function_name)
155      if (ConsumeToken(tok::r_paren))
156        if (ConsumeArguments()) {
157          SkipFunctionQualifiers();
158          start_position.Remove();
159          before_inner_function_pos.Remove();
160          return maybe_inner_function_name;
161        }
162  }
163
164  auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
165  if (maybe_inner_function_ptr_name)
166    if (ConsumeToken(tok::r_paren))
167      if (ConsumeArguments()) {
168        SkipFunctionQualifiers();
169        start_position.Remove();
170        return maybe_inner_function_ptr_name;
171      }
172  return None;
173}
174
175bool CPlusPlusNameParser::ConsumeArguments() {
176  return ConsumeBrackets(tok::l_paren, tok::r_paren);
177}
178
179bool CPlusPlusNameParser::ConsumeTemplateArgs() {
180  Bookmark start_position = SetBookmark();
181  if (!HasMoreTokens() || Peek().getKind() != tok::less)
182    return false;
183  Advance();
184
185  // Consuming template arguments is a bit trickier than consuming function
186  // arguments, because '<' '>' brackets are not always trivially balanced.
187  // In some rare cases tokens '<' and '>' can appear inside template arguments
188  // as arithmetic or shift operators not as template brackets.
189  // Examples: std::enable_if<(10u)<(64), bool>
190  //           f<A<operator<(X,Y)::Subclass>>
191  // Good thing that compiler makes sure that really ambiguous cases of
192  // '>' usage should be enclosed within '()' brackets.
193  int template_counter = 1;
194  bool can_open_template = false;
195  while (HasMoreTokens() && template_counter > 0) {
196    tok::TokenKind kind = Peek().getKind();
197    switch (kind) {
198    case tok::greatergreater:
199      template_counter -= 2;
200      can_open_template = false;
201      Advance();
202      break;
203    case tok::greater:
204      --template_counter;
205      can_open_template = false;
206      Advance();
207      break;
208    case tok::less:
209      // '<' is an attempt to open a subteamplte
210      // check if parser is at the point where it's actually possible,
211      // otherwise it's just a part of an expression like 'sizeof(T)<(10)'.
212      // No need to do the same for '>' because compiler actually makes sure
213      // that '>' always surrounded by brackets to avoid ambiguity.
214      if (can_open_template)
215        ++template_counter;
216      can_open_template = false;
217      Advance();
218      break;
219    case tok::kw_operator: // C++ operator overloading.
220      if (!ConsumeOperator())
221        return false;
222      can_open_template = true;
223      break;
224    case tok::raw_identifier:
225      can_open_template = true;
226      Advance();
227      break;
228    case tok::l_square:
229      if (!ConsumeBrackets(tok::l_square, tok::r_square))
230        return false;
231      can_open_template = false;
232      break;
233    case tok::l_paren:
234      if (!ConsumeArguments())
235        return false;
236      can_open_template = false;
237      break;
238    default:
239      can_open_template = false;
240      Advance();
241      break;
242    }
243  }
244
245  assert(template_counter >= 0);
246  if (template_counter > 0) {
247    return false;
248  }
249  start_position.Remove();
250  return true;
251}
252
253bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
254  Bookmark start_position = SetBookmark();
255  if (!ConsumeToken(tok::l_paren)) {
256    return false;
257  }
258  constexpr llvm::StringLiteral g_anonymous("anonymous");
259  if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
260      Peek().getRawIdentifier() == g_anonymous) {
261    Advance();
262  } else {
263    return false;
264  }
265
266  if (!ConsumeToken(tok::kw_namespace)) {
267    return false;
268  }
269
270  if (!ConsumeToken(tok::r_paren)) {
271    return false;
272  }
273  start_position.Remove();
274  return true;
275}
276
277bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
278                                          tok::TokenKind right) {
279  Bookmark start_position = SetBookmark();
280  if (!HasMoreTokens() || Peek().getKind() != left)
281    return false;
282  Advance();
283
284  int counter = 1;
285  while (HasMoreTokens() && counter > 0) {
286    tok::TokenKind kind = Peek().getKind();
287    if (kind == right)
288      --counter;
289    else if (kind == left)
290      ++counter;
291    Advance();
292  }
293
294  assert(counter >= 0);
295  if (counter > 0) {
296    return false;
297  }
298  start_position.Remove();
299  return true;
300}
301
302bool CPlusPlusNameParser::ConsumeOperator() {
303  Bookmark start_position = SetBookmark();
304  if (!ConsumeToken(tok::kw_operator))
305    return false;
306
307  if (!HasMoreTokens()) {
308    return false;
309  }
310
311  const auto &token = Peek();
312  switch (token.getKind()) {
313  case tok::kw_new:
314  case tok::kw_delete:
315    // This is 'new' or 'delete' operators.
316    Advance();
317    // Check for array new/delete.
318    if (HasMoreTokens() && Peek().is(tok::l_square)) {
319      // Consume the '[' and ']'.
320      if (!ConsumeBrackets(tok::l_square, tok::r_square))
321        return false;
322    }
323    break;
324
325#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly)  \
326  case tok::Token:                                                             \
327    Advance();                                                                 \
328    break;
329#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
330#include "clang/Basic/OperatorKinds.def"
331#undef OVERLOADED_OPERATOR
332#undef OVERLOADED_OPERATOR_MULTI
333
334  case tok::l_paren:
335    // Call operator consume '(' ... ')'.
336    if (ConsumeBrackets(tok::l_paren, tok::r_paren))
337      break;
338    return false;
339
340  case tok::l_square:
341    // This is a [] operator.
342    // Consume the '[' and ']'.
343    if (ConsumeBrackets(tok::l_square, tok::r_square))
344      break;
345    return false;
346
347  default:
348    // This might be a cast operator.
349    if (ConsumeTypename())
350      break;
351    return false;
352  }
353  start_position.Remove();
354  return true;
355}
356
357void CPlusPlusNameParser::SkipTypeQualifiers() {
358  while (ConsumeToken(tok::kw_const, tok::kw_volatile))
359    ;
360}
361
362void CPlusPlusNameParser::SkipFunctionQualifiers() {
363  while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
364    ;
365}
366
367bool CPlusPlusNameParser::ConsumeBuiltinType() {
368  bool result = false;
369  bool continue_parsing = true;
370  // Built-in types can be made of a few keywords
371  // like 'unsigned long long int'. This function
372  // consumes all built-in type keywords without
373  // checking if they make sense like 'unsigned char void'.
374  while (continue_parsing && HasMoreTokens()) {
375    switch (Peek().getKind()) {
376    case tok::kw_short:
377    case tok::kw_long:
378    case tok::kw___int64:
379    case tok::kw___int128:
380    case tok::kw_signed:
381    case tok::kw_unsigned:
382    case tok::kw_void:
383    case tok::kw_char:
384    case tok::kw_int:
385    case tok::kw_half:
386    case tok::kw_float:
387    case tok::kw_double:
388    case tok::kw___float128:
389    case tok::kw_wchar_t:
390    case tok::kw_bool:
391    case tok::kw_char16_t:
392    case tok::kw_char32_t:
393      result = true;
394      Advance();
395      break;
396    default:
397      continue_parsing = false;
398      break;
399    }
400  }
401  return result;
402}
403
404void CPlusPlusNameParser::SkipPtrsAndRefs() {
405  // Ignoring result.
406  ConsumePtrsAndRefs();
407}
408
409bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
410  bool found = false;
411  SkipTypeQualifiers();
412  while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
413                      tok::kw_volatile)) {
414    found = true;
415    SkipTypeQualifiers();
416  }
417  return found;
418}
419
420bool CPlusPlusNameParser::ConsumeDecltype() {
421  Bookmark start_position = SetBookmark();
422  if (!ConsumeToken(tok::kw_decltype))
423    return false;
424
425  if (!ConsumeArguments())
426    return false;
427
428  start_position.Remove();
429  return true;
430}
431
432bool CPlusPlusNameParser::ConsumeTypename() {
433  Bookmark start_position = SetBookmark();
434  SkipTypeQualifiers();
435  if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
436    if (!ParseFullNameImpl())
437      return false;
438  }
439  SkipPtrsAndRefs();
440  start_position.Remove();
441  return true;
442}
443
444Optional<CPlusPlusNameParser::ParsedNameRanges>
445CPlusPlusNameParser::ParseFullNameImpl() {
446  // Name parsing state machine.
447  enum class State {
448    Beginning,       // start of the name
449    AfterTwoColons,  // right after ::
450    AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
451    AfterTemplate,   // right after template brackets (<something>)
452    AfterOperator,   // right after name of C++ operator
453  };
454
455  Bookmark start_position = SetBookmark();
456  State state = State::Beginning;
457  bool continue_parsing = true;
458  Optional<size_t> last_coloncolon_position = None;
459
460  while (continue_parsing && HasMoreTokens()) {
461    const auto &token = Peek();
462    switch (token.getKind()) {
463    case tok::raw_identifier: // Just a name.
464      if (state != State::Beginning && state != State::AfterTwoColons) {
465        continue_parsing = false;
466        break;
467      }
468      Advance();
469      state = State::AfterIdentifier;
470      break;
471    case tok::l_paren: {
472      if (state == State::Beginning || state == State::AfterTwoColons) {
473        // (anonymous namespace)
474        if (ConsumeAnonymousNamespace()) {
475          state = State::AfterIdentifier;
476          break;
477        }
478      }
479
480      // Type declared inside a function 'func()::Type'
481      if (state != State::AfterIdentifier && state != State::AfterTemplate &&
482          state != State::AfterOperator) {
483        continue_parsing = false;
484        break;
485      }
486      Bookmark l_paren_position = SetBookmark();
487      // Consume the '(' ... ') [const]'.
488      if (!ConsumeArguments()) {
489        continue_parsing = false;
490        break;
491      }
492      SkipFunctionQualifiers();
493
494      // Consume '::'
495      size_t coloncolon_position = GetCurrentPosition();
496      if (!ConsumeToken(tok::coloncolon)) {
497        continue_parsing = false;
498        break;
499      }
500      l_paren_position.Remove();
501      last_coloncolon_position = coloncolon_position;
502      state = State::AfterTwoColons;
503      break;
504    }
505    case tok::coloncolon: // Type nesting delimiter.
506      if (state != State::Beginning && state != State::AfterIdentifier &&
507          state != State::AfterTemplate) {
508        continue_parsing = false;
509        break;
510      }
511      last_coloncolon_position = GetCurrentPosition();
512      Advance();
513      state = State::AfterTwoColons;
514      break;
515    case tok::less: // Template brackets.
516      if (state != State::AfterIdentifier && state != State::AfterOperator) {
517        continue_parsing = false;
518        break;
519      }
520      if (!ConsumeTemplateArgs()) {
521        continue_parsing = false;
522        break;
523      }
524      state = State::AfterTemplate;
525      break;
526    case tok::kw_operator: // C++ operator overloading.
527      if (state != State::Beginning && state != State::AfterTwoColons) {
528        continue_parsing = false;
529        break;
530      }
531      if (!ConsumeOperator()) {
532        continue_parsing = false;
533        break;
534      }
535      state = State::AfterOperator;
536      break;
537    case tok::tilde: // Destructor.
538      if (state != State::Beginning && state != State::AfterTwoColons) {
539        continue_parsing = false;
540        break;
541      }
542      Advance();
543      if (ConsumeToken(tok::raw_identifier)) {
544        state = State::AfterIdentifier;
545      } else {
546        TakeBack();
547        continue_parsing = false;
548      }
549      break;
550    default:
551      continue_parsing = false;
552      break;
553    }
554  }
555
556  if (state == State::AfterIdentifier || state == State::AfterOperator ||
557      state == State::AfterTemplate) {
558    ParsedNameRanges result;
559    if (last_coloncolon_position) {
560      result.context_range = Range(start_position.GetSavedPosition(),
561                                   last_coloncolon_position.getValue());
562      result.basename_range =
563          Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
564    } else {
565      result.basename_range =
566          Range(start_position.GetSavedPosition(), GetCurrentPosition());
567    }
568    start_position.Remove();
569    return result;
570  } else {
571    return None;
572  }
573}
574
575llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
576  if (range.empty())
577    return llvm::StringRef();
578  assert(range.begin_index < range.end_index);
579  assert(range.begin_index < m_tokens.size());
580  assert(range.end_index <= m_tokens.size());
581  clang::Token &first_token = m_tokens[range.begin_index];
582  clang::Token &last_token = m_tokens[range.end_index - 1];
583  clang::SourceLocation start_loc = first_token.getLocation();
584  clang::SourceLocation end_loc = last_token.getLocation();
585  unsigned start_pos = start_loc.getRawEncoding();
586  unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
587  return m_text.take_front(end_pos).drop_front(start_pos);
588}
589
590static const clang::LangOptions &GetLangOptions() {
591  static clang::LangOptions g_options;
592  static llvm::once_flag g_once_flag;
593  llvm::call_once(g_once_flag, []() {
594    g_options.LineComment = true;
595    g_options.C99 = true;
596    g_options.C11 = true;
597    g_options.CPlusPlus = true;
598    g_options.CPlusPlus11 = true;
599    g_options.CPlusPlus14 = true;
600    g_options.CPlusPlus1z = true;
601  });
602  return g_options;
603}
604
605static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
606  static llvm::StringMap<tok::TokenKind> g_map{
607#define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
608#include "clang/Basic/TokenKinds.def"
609#undef KEYWORD
610  };
611  return g_map;
612}
613
614void CPlusPlusNameParser::ExtractTokens() {
615  clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
616                     m_text.data(), m_text.data() + m_text.size());
617  const auto &kw_map = GetKeywordsMap();
618  clang::Token token;
619  for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
620       lexer.LexFromRawLexer(token)) {
621    if (token.is(clang::tok::raw_identifier)) {
622      auto it = kw_map.find(token.getRawIdentifier());
623      if (it != kw_map.end()) {
624        token.setKind(it->getValue());
625      }
626    }
627
628    m_tokens.push_back(token);
629  }
630}
631