CPlusPlusNameParser.cpp revision 341825
1//===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "CPlusPlusNameParser.h" 11 12#include "clang/Basic/IdentifierTable.h" 13#include "llvm/ADT/StringMap.h" 14#include "llvm/Support/Threading.h" 15 16using namespace lldb; 17using namespace lldb_private; 18using llvm::Optional; 19using llvm::None; 20using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction; 21using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName; 22namespace tok = clang::tok; 23 24Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() { 25 m_next_token_index = 0; 26 Optional<ParsedFunction> result(None); 27 28 // Try to parse the name as function without a return type specified e.g. 29 // main(int, char*[]) 30 { 31 Bookmark start_position = SetBookmark(); 32 result = ParseFunctionImpl(false); 33 if (result && !HasMoreTokens()) 34 return result; 35 } 36 37 // Try to parse the name as function with function pointer return type e.g. 38 // void (*get_func(const char*))() 39 result = ParseFuncPtr(true); 40 if (result) 41 return result; 42 43 // Finally try to parse the name as a function with non-function return type 44 // e.g. int main(int, char*[]) 45 result = ParseFunctionImpl(true); 46 if (HasMoreTokens()) 47 return None; 48 return result; 49} 50 51Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() { 52 m_next_token_index = 0; 53 Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl(); 54 if (!name_ranges) 55 return None; 56 if (HasMoreTokens()) 57 return None; 58 ParsedName result; 59 result.basename = GetTextForRange(name_ranges.getValue().basename_range); 60 result.context = GetTextForRange(name_ranges.getValue().context_range); 61 return result; 62} 63 64bool CPlusPlusNameParser::HasMoreTokens() { 65 return m_next_token_index < m_tokens.size(); 66} 67 68void CPlusPlusNameParser::Advance() { ++m_next_token_index; } 69 70void CPlusPlusNameParser::TakeBack() { --m_next_token_index; } 71 72bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) { 73 if (!HasMoreTokens()) 74 return false; 75 76 if (!Peek().is(kind)) 77 return false; 78 79 Advance(); 80 return true; 81} 82 83template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) { 84 if (!HasMoreTokens()) 85 return false; 86 87 if (!Peek().isOneOf(kinds...)) 88 return false; 89 90 Advance(); 91 return true; 92} 93 94CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() { 95 return Bookmark(m_next_token_index); 96} 97 98size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; } 99 100clang::Token &CPlusPlusNameParser::Peek() { 101 assert(HasMoreTokens()); 102 return m_tokens[m_next_token_index]; 103} 104 105Optional<ParsedFunction> 106CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) { 107 Bookmark start_position = SetBookmark(); 108 if (expect_return_type) { 109 // Consume return type if it's expected. 110 if (!ConsumeTypename()) 111 return None; 112 } 113 114 auto maybe_name = ParseFullNameImpl(); 115 if (!maybe_name) { 116 return None; 117 } 118 119 size_t argument_start = GetCurrentPosition(); 120 if (!ConsumeArguments()) { 121 return None; 122 } 123 124 size_t qualifiers_start = GetCurrentPosition(); 125 SkipFunctionQualifiers(); 126 size_t end_position = GetCurrentPosition(); 127 128 ParsedFunction result; 129 result.name.basename = GetTextForRange(maybe_name.getValue().basename_range); 130 result.name.context = GetTextForRange(maybe_name.getValue().context_range); 131 result.arguments = GetTextForRange(Range(argument_start, qualifiers_start)); 132 result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position)); 133 start_position.Remove(); 134 return result; 135} 136 137Optional<ParsedFunction> 138CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) { 139 Bookmark start_position = SetBookmark(); 140 if (expect_return_type) { 141 // Consume return type. 142 if (!ConsumeTypename()) 143 return None; 144 } 145 146 if (!ConsumeToken(tok::l_paren)) 147 return None; 148 if (!ConsumePtrsAndRefs()) 149 return None; 150 151 { 152 Bookmark before_inner_function_pos = SetBookmark(); 153 auto maybe_inner_function_name = ParseFunctionImpl(false); 154 if (maybe_inner_function_name) 155 if (ConsumeToken(tok::r_paren)) 156 if (ConsumeArguments()) { 157 SkipFunctionQualifiers(); 158 start_position.Remove(); 159 before_inner_function_pos.Remove(); 160 return maybe_inner_function_name; 161 } 162 } 163 164 auto maybe_inner_function_ptr_name = ParseFuncPtr(false); 165 if (maybe_inner_function_ptr_name) 166 if (ConsumeToken(tok::r_paren)) 167 if (ConsumeArguments()) { 168 SkipFunctionQualifiers(); 169 start_position.Remove(); 170 return maybe_inner_function_ptr_name; 171 } 172 return None; 173} 174 175bool CPlusPlusNameParser::ConsumeArguments() { 176 return ConsumeBrackets(tok::l_paren, tok::r_paren); 177} 178 179bool CPlusPlusNameParser::ConsumeTemplateArgs() { 180 Bookmark start_position = SetBookmark(); 181 if (!HasMoreTokens() || Peek().getKind() != tok::less) 182 return false; 183 Advance(); 184 185 // Consuming template arguments is a bit trickier than consuming function 186 // arguments, because '<' '>' brackets are not always trivially balanced. In 187 // some rare cases tokens '<' and '>' can appear inside template arguments as 188 // arithmetic or shift operators not as template brackets. Examples: 189 // std::enable_if<(10u)<(64), bool> 190 // f<A<operator<(X,Y)::Subclass>> 191 // Good thing that compiler makes sure that really ambiguous cases of '>' 192 // usage should be enclosed within '()' brackets. 193 int template_counter = 1; 194 bool can_open_template = false; 195 while (HasMoreTokens() && template_counter > 0) { 196 tok::TokenKind kind = Peek().getKind(); 197 switch (kind) { 198 case tok::greatergreater: 199 template_counter -= 2; 200 can_open_template = false; 201 Advance(); 202 break; 203 case tok::greater: 204 --template_counter; 205 can_open_template = false; 206 Advance(); 207 break; 208 case tok::less: 209 // '<' is an attempt to open a subteamplte 210 // check if parser is at the point where it's actually possible, 211 // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No 212 // need to do the same for '>' because compiler actually makes sure that 213 // '>' always surrounded by brackets to avoid ambiguity. 214 if (can_open_template) 215 ++template_counter; 216 can_open_template = false; 217 Advance(); 218 break; 219 case tok::kw_operator: // C++ operator overloading. 220 if (!ConsumeOperator()) 221 return false; 222 can_open_template = true; 223 break; 224 case tok::raw_identifier: 225 can_open_template = true; 226 Advance(); 227 break; 228 case tok::l_square: 229 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 230 return false; 231 can_open_template = false; 232 break; 233 case tok::l_paren: 234 if (!ConsumeArguments()) 235 return false; 236 can_open_template = false; 237 break; 238 default: 239 can_open_template = false; 240 Advance(); 241 break; 242 } 243 } 244 245 if (template_counter != 0) { 246 return false; 247 } 248 start_position.Remove(); 249 return true; 250} 251 252bool CPlusPlusNameParser::ConsumeAnonymousNamespace() { 253 Bookmark start_position = SetBookmark(); 254 if (!ConsumeToken(tok::l_paren)) { 255 return false; 256 } 257 constexpr llvm::StringLiteral g_anonymous("anonymous"); 258 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 259 Peek().getRawIdentifier() == g_anonymous) { 260 Advance(); 261 } else { 262 return false; 263 } 264 265 if (!ConsumeToken(tok::kw_namespace)) { 266 return false; 267 } 268 269 if (!ConsumeToken(tok::r_paren)) { 270 return false; 271 } 272 start_position.Remove(); 273 return true; 274} 275 276bool CPlusPlusNameParser::ConsumeLambda() { 277 Bookmark start_position = SetBookmark(); 278 if (!ConsumeToken(tok::l_brace)) { 279 return false; 280 } 281 constexpr llvm::StringLiteral g_lambda("lambda"); 282 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 283 Peek().getRawIdentifier() == g_lambda) { 284 // Put the matched brace back so we can use ConsumeBrackets 285 TakeBack(); 286 } else { 287 return false; 288 } 289 290 if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) { 291 return false; 292 } 293 294 start_position.Remove(); 295 return true; 296} 297 298bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left, 299 tok::TokenKind right) { 300 Bookmark start_position = SetBookmark(); 301 if (!HasMoreTokens() || Peek().getKind() != left) 302 return false; 303 Advance(); 304 305 int counter = 1; 306 while (HasMoreTokens() && counter > 0) { 307 tok::TokenKind kind = Peek().getKind(); 308 if (kind == right) 309 --counter; 310 else if (kind == left) 311 ++counter; 312 Advance(); 313 } 314 315 assert(counter >= 0); 316 if (counter > 0) { 317 return false; 318 } 319 start_position.Remove(); 320 return true; 321} 322 323bool CPlusPlusNameParser::ConsumeOperator() { 324 Bookmark start_position = SetBookmark(); 325 if (!ConsumeToken(tok::kw_operator)) 326 return false; 327 328 if (!HasMoreTokens()) { 329 return false; 330 } 331 332 const auto &token = Peek(); 333 switch (token.getKind()) { 334 case tok::kw_new: 335 case tok::kw_delete: 336 // This is 'new' or 'delete' operators. 337 Advance(); 338 // Check for array new/delete. 339 if (HasMoreTokens() && Peek().is(tok::l_square)) { 340 // Consume the '[' and ']'. 341 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 342 return false; 343 } 344 break; 345 346#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \ 347 case tok::Token: \ 348 Advance(); \ 349 break; 350#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly) 351#include "clang/Basic/OperatorKinds.def" 352#undef OVERLOADED_OPERATOR 353#undef OVERLOADED_OPERATOR_MULTI 354 355 case tok::l_paren: 356 // Call operator consume '(' ... ')'. 357 if (ConsumeBrackets(tok::l_paren, tok::r_paren)) 358 break; 359 return false; 360 361 case tok::l_square: 362 // This is a [] operator. 363 // Consume the '[' and ']'. 364 if (ConsumeBrackets(tok::l_square, tok::r_square)) 365 break; 366 return false; 367 368 default: 369 // This might be a cast operator. 370 if (ConsumeTypename()) 371 break; 372 return false; 373 } 374 start_position.Remove(); 375 return true; 376} 377 378void CPlusPlusNameParser::SkipTypeQualifiers() { 379 while (ConsumeToken(tok::kw_const, tok::kw_volatile)) 380 ; 381} 382 383void CPlusPlusNameParser::SkipFunctionQualifiers() { 384 while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp)) 385 ; 386} 387 388bool CPlusPlusNameParser::ConsumeBuiltinType() { 389 bool result = false; 390 bool continue_parsing = true; 391 // Built-in types can be made of a few keywords like 'unsigned long long 392 // int'. This function consumes all built-in type keywords without checking 393 // if they make sense like 'unsigned char void'. 394 while (continue_parsing && HasMoreTokens()) { 395 switch (Peek().getKind()) { 396 case tok::kw_short: 397 case tok::kw_long: 398 case tok::kw___int64: 399 case tok::kw___int128: 400 case tok::kw_signed: 401 case tok::kw_unsigned: 402 case tok::kw_void: 403 case tok::kw_char: 404 case tok::kw_int: 405 case tok::kw_half: 406 case tok::kw_float: 407 case tok::kw_double: 408 case tok::kw___float128: 409 case tok::kw_wchar_t: 410 case tok::kw_bool: 411 case tok::kw_char16_t: 412 case tok::kw_char32_t: 413 result = true; 414 Advance(); 415 break; 416 default: 417 continue_parsing = false; 418 break; 419 } 420 } 421 return result; 422} 423 424void CPlusPlusNameParser::SkipPtrsAndRefs() { 425 // Ignoring result. 426 ConsumePtrsAndRefs(); 427} 428 429bool CPlusPlusNameParser::ConsumePtrsAndRefs() { 430 bool found = false; 431 SkipTypeQualifiers(); 432 while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const, 433 tok::kw_volatile)) { 434 found = true; 435 SkipTypeQualifiers(); 436 } 437 return found; 438} 439 440bool CPlusPlusNameParser::ConsumeDecltype() { 441 Bookmark start_position = SetBookmark(); 442 if (!ConsumeToken(tok::kw_decltype)) 443 return false; 444 445 if (!ConsumeArguments()) 446 return false; 447 448 start_position.Remove(); 449 return true; 450} 451 452bool CPlusPlusNameParser::ConsumeTypename() { 453 Bookmark start_position = SetBookmark(); 454 SkipTypeQualifiers(); 455 if (!ConsumeBuiltinType() && !ConsumeDecltype()) { 456 if (!ParseFullNameImpl()) 457 return false; 458 } 459 SkipPtrsAndRefs(); 460 start_position.Remove(); 461 return true; 462} 463 464Optional<CPlusPlusNameParser::ParsedNameRanges> 465CPlusPlusNameParser::ParseFullNameImpl() { 466 // Name parsing state machine. 467 enum class State { 468 Beginning, // start of the name 469 AfterTwoColons, // right after :: 470 AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+) 471 AfterTemplate, // right after template brackets (<something>) 472 AfterOperator, // right after name of C++ operator 473 }; 474 475 Bookmark start_position = SetBookmark(); 476 State state = State::Beginning; 477 bool continue_parsing = true; 478 Optional<size_t> last_coloncolon_position = None; 479 480 while (continue_parsing && HasMoreTokens()) { 481 const auto &token = Peek(); 482 switch (token.getKind()) { 483 case tok::raw_identifier: // Just a name. 484 if (state != State::Beginning && state != State::AfterTwoColons) { 485 continue_parsing = false; 486 break; 487 } 488 Advance(); 489 state = State::AfterIdentifier; 490 break; 491 case tok::l_paren: { 492 if (state == State::Beginning || state == State::AfterTwoColons) { 493 // (anonymous namespace) 494 if (ConsumeAnonymousNamespace()) { 495 state = State::AfterIdentifier; 496 break; 497 } 498 } 499 500 // Type declared inside a function 'func()::Type' 501 if (state != State::AfterIdentifier && state != State::AfterTemplate && 502 state != State::AfterOperator) { 503 continue_parsing = false; 504 break; 505 } 506 Bookmark l_paren_position = SetBookmark(); 507 // Consume the '(' ... ') [const]'. 508 if (!ConsumeArguments()) { 509 continue_parsing = false; 510 break; 511 } 512 SkipFunctionQualifiers(); 513 514 // Consume '::' 515 size_t coloncolon_position = GetCurrentPosition(); 516 if (!ConsumeToken(tok::coloncolon)) { 517 continue_parsing = false; 518 break; 519 } 520 l_paren_position.Remove(); 521 last_coloncolon_position = coloncolon_position; 522 state = State::AfterTwoColons; 523 break; 524 } 525 case tok::l_brace: 526 if (state == State::Beginning || state == State::AfterTwoColons) { 527 if (ConsumeLambda()) { 528 state = State::AfterIdentifier; 529 break; 530 } 531 } 532 continue_parsing = false; 533 break; 534 case tok::coloncolon: // Type nesting delimiter. 535 if (state != State::Beginning && state != State::AfterIdentifier && 536 state != State::AfterTemplate) { 537 continue_parsing = false; 538 break; 539 } 540 last_coloncolon_position = GetCurrentPosition(); 541 Advance(); 542 state = State::AfterTwoColons; 543 break; 544 case tok::less: // Template brackets. 545 if (state != State::AfterIdentifier && state != State::AfterOperator) { 546 continue_parsing = false; 547 break; 548 } 549 if (!ConsumeTemplateArgs()) { 550 continue_parsing = false; 551 break; 552 } 553 state = State::AfterTemplate; 554 break; 555 case tok::kw_operator: // C++ operator overloading. 556 if (state != State::Beginning && state != State::AfterTwoColons) { 557 continue_parsing = false; 558 break; 559 } 560 if (!ConsumeOperator()) { 561 continue_parsing = false; 562 break; 563 } 564 state = State::AfterOperator; 565 break; 566 case tok::tilde: // Destructor. 567 if (state != State::Beginning && state != State::AfterTwoColons) { 568 continue_parsing = false; 569 break; 570 } 571 Advance(); 572 if (ConsumeToken(tok::raw_identifier)) { 573 state = State::AfterIdentifier; 574 } else { 575 TakeBack(); 576 continue_parsing = false; 577 } 578 break; 579 default: 580 continue_parsing = false; 581 break; 582 } 583 } 584 585 if (state == State::AfterIdentifier || state == State::AfterOperator || 586 state == State::AfterTemplate) { 587 ParsedNameRanges result; 588 if (last_coloncolon_position) { 589 result.context_range = Range(start_position.GetSavedPosition(), 590 last_coloncolon_position.getValue()); 591 result.basename_range = 592 Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition()); 593 } else { 594 result.basename_range = 595 Range(start_position.GetSavedPosition(), GetCurrentPosition()); 596 } 597 start_position.Remove(); 598 return result; 599 } else { 600 return None; 601 } 602} 603 604llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) { 605 if (range.empty()) 606 return llvm::StringRef(); 607 assert(range.begin_index < range.end_index); 608 assert(range.begin_index < m_tokens.size()); 609 assert(range.end_index <= m_tokens.size()); 610 clang::Token &first_token = m_tokens[range.begin_index]; 611 clang::Token &last_token = m_tokens[range.end_index - 1]; 612 clang::SourceLocation start_loc = first_token.getLocation(); 613 clang::SourceLocation end_loc = last_token.getLocation(); 614 unsigned start_pos = start_loc.getRawEncoding(); 615 unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength(); 616 return m_text.take_front(end_pos).drop_front(start_pos); 617} 618 619static const clang::LangOptions &GetLangOptions() { 620 static clang::LangOptions g_options; 621 static llvm::once_flag g_once_flag; 622 llvm::call_once(g_once_flag, []() { 623 g_options.LineComment = true; 624 g_options.C99 = true; 625 g_options.C11 = true; 626 g_options.CPlusPlus = true; 627 g_options.CPlusPlus11 = true; 628 g_options.CPlusPlus14 = true; 629 g_options.CPlusPlus17 = true; 630 }); 631 return g_options; 632} 633 634static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() { 635 static llvm::StringMap<tok::TokenKind> g_map{ 636#define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name}, 637#include "clang/Basic/TokenKinds.def" 638#undef KEYWORD 639 }; 640 return g_map; 641} 642 643void CPlusPlusNameParser::ExtractTokens() { 644 clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(), 645 m_text.data(), m_text.data() + m_text.size()); 646 const auto &kw_map = GetKeywordsMap(); 647 clang::Token token; 648 for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof); 649 lexer.LexFromRawLexer(token)) { 650 if (token.is(clang::tok::raw_identifier)) { 651 auto it = kw_map.find(token.getRawIdentifier()); 652 if (it != kw_map.end()) { 653 token.setKind(it->getValue()); 654 } 655 } 656 657 m_tokens.push_back(token); 658 } 659} 660