CPlusPlusNameParser.cpp revision 355940
1//===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#include "CPlusPlusNameParser.h" 10 11#include "clang/Basic/IdentifierTable.h" 12#include "llvm/ADT/StringMap.h" 13#include "llvm/Support/Threading.h" 14 15using namespace lldb; 16using namespace lldb_private; 17using llvm::Optional; 18using llvm::None; 19using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction; 20using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName; 21namespace tok = clang::tok; 22 23Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() { 24 m_next_token_index = 0; 25 Optional<ParsedFunction> result(None); 26 27 // Try to parse the name as function without a return type specified e.g. 28 // main(int, char*[]) 29 { 30 Bookmark start_position = SetBookmark(); 31 result = ParseFunctionImpl(false); 32 if (result && !HasMoreTokens()) 33 return result; 34 } 35 36 // Try to parse the name as function with function pointer return type e.g. 37 // void (*get_func(const char*))() 38 result = ParseFuncPtr(true); 39 if (result) 40 return result; 41 42 // Finally try to parse the name as a function with non-function return type 43 // e.g. int main(int, char*[]) 44 result = ParseFunctionImpl(true); 45 if (HasMoreTokens()) 46 return None; 47 return result; 48} 49 50Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() { 51 m_next_token_index = 0; 52 Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl(); 53 if (!name_ranges) 54 return None; 55 if (HasMoreTokens()) 56 return None; 57 ParsedName result; 58 result.basename = GetTextForRange(name_ranges.getValue().basename_range); 59 result.context = GetTextForRange(name_ranges.getValue().context_range); 60 return result; 61} 62 63bool CPlusPlusNameParser::HasMoreTokens() { 64 return m_next_token_index < m_tokens.size(); 65} 66 67void CPlusPlusNameParser::Advance() { ++m_next_token_index; } 68 69void CPlusPlusNameParser::TakeBack() { --m_next_token_index; } 70 71bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) { 72 if (!HasMoreTokens()) 73 return false; 74 75 if (!Peek().is(kind)) 76 return false; 77 78 Advance(); 79 return true; 80} 81 82template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) { 83 if (!HasMoreTokens()) 84 return false; 85 86 if (!Peek().isOneOf(kinds...)) 87 return false; 88 89 Advance(); 90 return true; 91} 92 93CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() { 94 return Bookmark(m_next_token_index); 95} 96 97size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; } 98 99clang::Token &CPlusPlusNameParser::Peek() { 100 assert(HasMoreTokens()); 101 return m_tokens[m_next_token_index]; 102} 103 104Optional<ParsedFunction> 105CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) { 106 Bookmark start_position = SetBookmark(); 107 if (expect_return_type) { 108 // Consume return type if it's expected. 109 if (!ConsumeTypename()) 110 return None; 111 } 112 113 auto maybe_name = ParseFullNameImpl(); 114 if (!maybe_name) { 115 return None; 116 } 117 118 size_t argument_start = GetCurrentPosition(); 119 if (!ConsumeArguments()) { 120 return None; 121 } 122 123 size_t qualifiers_start = GetCurrentPosition(); 124 SkipFunctionQualifiers(); 125 size_t end_position = GetCurrentPosition(); 126 127 ParsedFunction result; 128 result.name.basename = GetTextForRange(maybe_name.getValue().basename_range); 129 result.name.context = GetTextForRange(maybe_name.getValue().context_range); 130 result.arguments = GetTextForRange(Range(argument_start, qualifiers_start)); 131 result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position)); 132 start_position.Remove(); 133 return result; 134} 135 136Optional<ParsedFunction> 137CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) { 138 Bookmark start_position = SetBookmark(); 139 if (expect_return_type) { 140 // Consume return type. 141 if (!ConsumeTypename()) 142 return None; 143 } 144 145 if (!ConsumeToken(tok::l_paren)) 146 return None; 147 if (!ConsumePtrsAndRefs()) 148 return None; 149 150 { 151 Bookmark before_inner_function_pos = SetBookmark(); 152 auto maybe_inner_function_name = ParseFunctionImpl(false); 153 if (maybe_inner_function_name) 154 if (ConsumeToken(tok::r_paren)) 155 if (ConsumeArguments()) { 156 SkipFunctionQualifiers(); 157 start_position.Remove(); 158 before_inner_function_pos.Remove(); 159 return maybe_inner_function_name; 160 } 161 } 162 163 auto maybe_inner_function_ptr_name = ParseFuncPtr(false); 164 if (maybe_inner_function_ptr_name) 165 if (ConsumeToken(tok::r_paren)) 166 if (ConsumeArguments()) { 167 SkipFunctionQualifiers(); 168 start_position.Remove(); 169 return maybe_inner_function_ptr_name; 170 } 171 return None; 172} 173 174bool CPlusPlusNameParser::ConsumeArguments() { 175 return ConsumeBrackets(tok::l_paren, tok::r_paren); 176} 177 178bool CPlusPlusNameParser::ConsumeTemplateArgs() { 179 Bookmark start_position = SetBookmark(); 180 if (!HasMoreTokens() || Peek().getKind() != tok::less) 181 return false; 182 Advance(); 183 184 // Consuming template arguments is a bit trickier than consuming function 185 // arguments, because '<' '>' brackets are not always trivially balanced. In 186 // some rare cases tokens '<' and '>' can appear inside template arguments as 187 // arithmetic or shift operators not as template brackets. Examples: 188 // std::enable_if<(10u)<(64), bool> 189 // f<A<operator<(X,Y)::Subclass>> 190 // Good thing that compiler makes sure that really ambiguous cases of '>' 191 // usage should be enclosed within '()' brackets. 192 int template_counter = 1; 193 bool can_open_template = false; 194 while (HasMoreTokens() && template_counter > 0) { 195 tok::TokenKind kind = Peek().getKind(); 196 switch (kind) { 197 case tok::greatergreater: 198 template_counter -= 2; 199 can_open_template = false; 200 Advance(); 201 break; 202 case tok::greater: 203 --template_counter; 204 can_open_template = false; 205 Advance(); 206 break; 207 case tok::less: 208 // '<' is an attempt to open a subteamplte 209 // check if parser is at the point where it's actually possible, 210 // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No 211 // need to do the same for '>' because compiler actually makes sure that 212 // '>' always surrounded by brackets to avoid ambiguity. 213 if (can_open_template) 214 ++template_counter; 215 can_open_template = false; 216 Advance(); 217 break; 218 case tok::kw_operator: // C++ operator overloading. 219 if (!ConsumeOperator()) 220 return false; 221 can_open_template = true; 222 break; 223 case tok::raw_identifier: 224 can_open_template = true; 225 Advance(); 226 break; 227 case tok::l_square: 228 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 229 return false; 230 can_open_template = false; 231 break; 232 case tok::l_paren: 233 if (!ConsumeArguments()) 234 return false; 235 can_open_template = false; 236 break; 237 default: 238 can_open_template = false; 239 Advance(); 240 break; 241 } 242 } 243 244 if (template_counter != 0) { 245 return false; 246 } 247 start_position.Remove(); 248 return true; 249} 250 251bool CPlusPlusNameParser::ConsumeAnonymousNamespace() { 252 Bookmark start_position = SetBookmark(); 253 if (!ConsumeToken(tok::l_paren)) { 254 return false; 255 } 256 constexpr llvm::StringLiteral g_anonymous("anonymous"); 257 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 258 Peek().getRawIdentifier() == g_anonymous) { 259 Advance(); 260 } else { 261 return false; 262 } 263 264 if (!ConsumeToken(tok::kw_namespace)) { 265 return false; 266 } 267 268 if (!ConsumeToken(tok::r_paren)) { 269 return false; 270 } 271 start_position.Remove(); 272 return true; 273} 274 275bool CPlusPlusNameParser::ConsumeLambda() { 276 Bookmark start_position = SetBookmark(); 277 if (!ConsumeToken(tok::l_brace)) { 278 return false; 279 } 280 constexpr llvm::StringLiteral g_lambda("lambda"); 281 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 282 Peek().getRawIdentifier() == g_lambda) { 283 // Put the matched brace back so we can use ConsumeBrackets 284 TakeBack(); 285 } else { 286 return false; 287 } 288 289 if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) { 290 return false; 291 } 292 293 start_position.Remove(); 294 return true; 295} 296 297bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left, 298 tok::TokenKind right) { 299 Bookmark start_position = SetBookmark(); 300 if (!HasMoreTokens() || Peek().getKind() != left) 301 return false; 302 Advance(); 303 304 int counter = 1; 305 while (HasMoreTokens() && counter > 0) { 306 tok::TokenKind kind = Peek().getKind(); 307 if (kind == right) 308 --counter; 309 else if (kind == left) 310 ++counter; 311 Advance(); 312 } 313 314 assert(counter >= 0); 315 if (counter > 0) { 316 return false; 317 } 318 start_position.Remove(); 319 return true; 320} 321 322bool CPlusPlusNameParser::ConsumeOperator() { 323 Bookmark start_position = SetBookmark(); 324 if (!ConsumeToken(tok::kw_operator)) 325 return false; 326 327 if (!HasMoreTokens()) { 328 return false; 329 } 330 331 const auto &token = Peek(); 332 switch (token.getKind()) { 333 case tok::kw_new: 334 case tok::kw_delete: 335 // This is 'new' or 'delete' operators. 336 Advance(); 337 // Check for array new/delete. 338 if (HasMoreTokens() && Peek().is(tok::l_square)) { 339 // Consume the '[' and ']'. 340 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 341 return false; 342 } 343 break; 344 345#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \ 346 case tok::Token: \ 347 Advance(); \ 348 break; 349#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly) 350#include "clang/Basic/OperatorKinds.def" 351#undef OVERLOADED_OPERATOR 352#undef OVERLOADED_OPERATOR_MULTI 353 354 case tok::l_paren: 355 // Call operator consume '(' ... ')'. 356 if (ConsumeBrackets(tok::l_paren, tok::r_paren)) 357 break; 358 return false; 359 360 case tok::l_square: 361 // This is a [] operator. 362 // Consume the '[' and ']'. 363 if (ConsumeBrackets(tok::l_square, tok::r_square)) 364 break; 365 return false; 366 367 default: 368 // This might be a cast operator. 369 if (ConsumeTypename()) 370 break; 371 return false; 372 } 373 start_position.Remove(); 374 return true; 375} 376 377void CPlusPlusNameParser::SkipTypeQualifiers() { 378 while (ConsumeToken(tok::kw_const, tok::kw_volatile)) 379 ; 380} 381 382void CPlusPlusNameParser::SkipFunctionQualifiers() { 383 while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp)) 384 ; 385} 386 387bool CPlusPlusNameParser::ConsumeBuiltinType() { 388 bool result = false; 389 bool continue_parsing = true; 390 // Built-in types can be made of a few keywords like 'unsigned long long 391 // int'. This function consumes all built-in type keywords without checking 392 // if they make sense like 'unsigned char void'. 393 while (continue_parsing && HasMoreTokens()) { 394 switch (Peek().getKind()) { 395 case tok::kw_short: 396 case tok::kw_long: 397 case tok::kw___int64: 398 case tok::kw___int128: 399 case tok::kw_signed: 400 case tok::kw_unsigned: 401 case tok::kw_void: 402 case tok::kw_char: 403 case tok::kw_int: 404 case tok::kw_half: 405 case tok::kw_float: 406 case tok::kw_double: 407 case tok::kw___float128: 408 case tok::kw_wchar_t: 409 case tok::kw_bool: 410 case tok::kw_char16_t: 411 case tok::kw_char32_t: 412 result = true; 413 Advance(); 414 break; 415 default: 416 continue_parsing = false; 417 break; 418 } 419 } 420 return result; 421} 422 423void CPlusPlusNameParser::SkipPtrsAndRefs() { 424 // Ignoring result. 425 ConsumePtrsAndRefs(); 426} 427 428bool CPlusPlusNameParser::ConsumePtrsAndRefs() { 429 bool found = false; 430 SkipTypeQualifiers(); 431 while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const, 432 tok::kw_volatile)) { 433 found = true; 434 SkipTypeQualifiers(); 435 } 436 return found; 437} 438 439bool CPlusPlusNameParser::ConsumeDecltype() { 440 Bookmark start_position = SetBookmark(); 441 if (!ConsumeToken(tok::kw_decltype)) 442 return false; 443 444 if (!ConsumeArguments()) 445 return false; 446 447 start_position.Remove(); 448 return true; 449} 450 451bool CPlusPlusNameParser::ConsumeTypename() { 452 Bookmark start_position = SetBookmark(); 453 SkipTypeQualifiers(); 454 if (!ConsumeBuiltinType() && !ConsumeDecltype()) { 455 if (!ParseFullNameImpl()) 456 return false; 457 } 458 SkipPtrsAndRefs(); 459 start_position.Remove(); 460 return true; 461} 462 463Optional<CPlusPlusNameParser::ParsedNameRanges> 464CPlusPlusNameParser::ParseFullNameImpl() { 465 // Name parsing state machine. 466 enum class State { 467 Beginning, // start of the name 468 AfterTwoColons, // right after :: 469 AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+) 470 AfterTemplate, // right after template brackets (<something>) 471 AfterOperator, // right after name of C++ operator 472 }; 473 474 Bookmark start_position = SetBookmark(); 475 State state = State::Beginning; 476 bool continue_parsing = true; 477 Optional<size_t> last_coloncolon_position = None; 478 479 while (continue_parsing && HasMoreTokens()) { 480 const auto &token = Peek(); 481 switch (token.getKind()) { 482 case tok::raw_identifier: // Just a name. 483 if (state != State::Beginning && state != State::AfterTwoColons) { 484 continue_parsing = false; 485 break; 486 } 487 Advance(); 488 state = State::AfterIdentifier; 489 break; 490 case tok::l_paren: { 491 if (state == State::Beginning || state == State::AfterTwoColons) { 492 // (anonymous namespace) 493 if (ConsumeAnonymousNamespace()) { 494 state = State::AfterIdentifier; 495 break; 496 } 497 } 498 499 // Type declared inside a function 'func()::Type' 500 if (state != State::AfterIdentifier && state != State::AfterTemplate && 501 state != State::AfterOperator) { 502 continue_parsing = false; 503 break; 504 } 505 Bookmark l_paren_position = SetBookmark(); 506 // Consume the '(' ... ') [const]'. 507 if (!ConsumeArguments()) { 508 continue_parsing = false; 509 break; 510 } 511 SkipFunctionQualifiers(); 512 513 // Consume '::' 514 size_t coloncolon_position = GetCurrentPosition(); 515 if (!ConsumeToken(tok::coloncolon)) { 516 continue_parsing = false; 517 break; 518 } 519 l_paren_position.Remove(); 520 last_coloncolon_position = coloncolon_position; 521 state = State::AfterTwoColons; 522 break; 523 } 524 case tok::l_brace: 525 if (state == State::Beginning || state == State::AfterTwoColons) { 526 if (ConsumeLambda()) { 527 state = State::AfterIdentifier; 528 break; 529 } 530 } 531 continue_parsing = false; 532 break; 533 case tok::coloncolon: // Type nesting delimiter. 534 if (state != State::Beginning && state != State::AfterIdentifier && 535 state != State::AfterTemplate) { 536 continue_parsing = false; 537 break; 538 } 539 last_coloncolon_position = GetCurrentPosition(); 540 Advance(); 541 state = State::AfterTwoColons; 542 break; 543 case tok::less: // Template brackets. 544 if (state != State::AfterIdentifier && state != State::AfterOperator) { 545 continue_parsing = false; 546 break; 547 } 548 if (!ConsumeTemplateArgs()) { 549 continue_parsing = false; 550 break; 551 } 552 state = State::AfterTemplate; 553 break; 554 case tok::kw_operator: // C++ operator overloading. 555 if (state != State::Beginning && state != State::AfterTwoColons) { 556 continue_parsing = false; 557 break; 558 } 559 if (!ConsumeOperator()) { 560 continue_parsing = false; 561 break; 562 } 563 state = State::AfterOperator; 564 break; 565 case tok::tilde: // Destructor. 566 if (state != State::Beginning && state != State::AfterTwoColons) { 567 continue_parsing = false; 568 break; 569 } 570 Advance(); 571 if (ConsumeToken(tok::raw_identifier)) { 572 state = State::AfterIdentifier; 573 } else { 574 TakeBack(); 575 continue_parsing = false; 576 } 577 break; 578 default: 579 continue_parsing = false; 580 break; 581 } 582 } 583 584 if (state == State::AfterIdentifier || state == State::AfterOperator || 585 state == State::AfterTemplate) { 586 ParsedNameRanges result; 587 if (last_coloncolon_position) { 588 result.context_range = Range(start_position.GetSavedPosition(), 589 last_coloncolon_position.getValue()); 590 result.basename_range = 591 Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition()); 592 } else { 593 result.basename_range = 594 Range(start_position.GetSavedPosition(), GetCurrentPosition()); 595 } 596 start_position.Remove(); 597 return result; 598 } else { 599 return None; 600 } 601} 602 603llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) { 604 if (range.empty()) 605 return llvm::StringRef(); 606 assert(range.begin_index < range.end_index); 607 assert(range.begin_index < m_tokens.size()); 608 assert(range.end_index <= m_tokens.size()); 609 clang::Token &first_token = m_tokens[range.begin_index]; 610 clang::Token &last_token = m_tokens[range.end_index - 1]; 611 clang::SourceLocation start_loc = first_token.getLocation(); 612 clang::SourceLocation end_loc = last_token.getLocation(); 613 unsigned start_pos = start_loc.getRawEncoding(); 614 unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength(); 615 return m_text.take_front(end_pos).drop_front(start_pos); 616} 617 618static const clang::LangOptions &GetLangOptions() { 619 static clang::LangOptions g_options; 620 static llvm::once_flag g_once_flag; 621 llvm::call_once(g_once_flag, []() { 622 g_options.LineComment = true; 623 g_options.C99 = true; 624 g_options.C11 = true; 625 g_options.CPlusPlus = true; 626 g_options.CPlusPlus11 = true; 627 g_options.CPlusPlus14 = true; 628 g_options.CPlusPlus17 = true; 629 }); 630 return g_options; 631} 632 633static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() { 634 static llvm::StringMap<tok::TokenKind> g_map{ 635#define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name}, 636#include "clang/Basic/TokenKinds.def" 637#undef KEYWORD 638 }; 639 return g_map; 640} 641 642void CPlusPlusNameParser::ExtractTokens() { 643 if (m_text.empty()) 644 return; 645 clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(), 646 m_text.data(), m_text.data() + m_text.size()); 647 const auto &kw_map = GetKeywordsMap(); 648 clang::Token token; 649 for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof); 650 lexer.LexFromRawLexer(token)) { 651 if (token.is(clang::tok::raw_identifier)) { 652 auto it = kw_map.find(token.getRawIdentifier()); 653 if (it != kw_map.end()) { 654 token.setKind(it->getValue()); 655 } 656 } 657 658 m_tokens.push_back(token); 659 } 660} 661