CPlusPlusNameParser.cpp revision 327952
1//===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "CPlusPlusNameParser.h" 11 12#include "clang/Basic/IdentifierTable.h" 13#include "llvm/ADT/StringMap.h" 14#include "llvm/Support/Threading.h" 15 16using namespace lldb; 17using namespace lldb_private; 18using llvm::Optional; 19using llvm::None; 20using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction; 21using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName; 22namespace tok = clang::tok; 23 24Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() { 25 m_next_token_index = 0; 26 Optional<ParsedFunction> result(None); 27 28 // Try to parse the name as function without a return type specified 29 // e.g. main(int, char*[]) 30 { 31 Bookmark start_position = SetBookmark(); 32 result = ParseFunctionImpl(false); 33 if (result && !HasMoreTokens()) 34 return result; 35 } 36 37 // Try to parse the name as function with function pointer return type 38 // e.g. void (*get_func(const char*))() 39 result = ParseFuncPtr(true); 40 if (result) 41 return result; 42 43 // Finally try to parse the name as a function with non-function return type 44 // e.g. int main(int, char*[]) 45 result = ParseFunctionImpl(true); 46 if (HasMoreTokens()) 47 return None; 48 return result; 49} 50 51Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() { 52 m_next_token_index = 0; 53 Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl(); 54 if (!name_ranges) 55 return None; 56 if (HasMoreTokens()) 57 return None; 58 ParsedName result; 59 result.basename = GetTextForRange(name_ranges.getValue().basename_range); 60 result.context = GetTextForRange(name_ranges.getValue().context_range); 61 return result; 62} 63 64bool CPlusPlusNameParser::HasMoreTokens() { 65 return m_next_token_index < m_tokens.size(); 66} 67 68void CPlusPlusNameParser::Advance() { ++m_next_token_index; } 69 70void CPlusPlusNameParser::TakeBack() { --m_next_token_index; } 71 72bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) { 73 if (!HasMoreTokens()) 74 return false; 75 76 if (!Peek().is(kind)) 77 return false; 78 79 Advance(); 80 return true; 81} 82 83template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) { 84 if (!HasMoreTokens()) 85 return false; 86 87 if (!Peek().isOneOf(kinds...)) 88 return false; 89 90 Advance(); 91 return true; 92} 93 94CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() { 95 return Bookmark(m_next_token_index); 96} 97 98size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; } 99 100clang::Token &CPlusPlusNameParser::Peek() { 101 assert(HasMoreTokens()); 102 return m_tokens[m_next_token_index]; 103} 104 105Optional<ParsedFunction> 106CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) { 107 Bookmark start_position = SetBookmark(); 108 if (expect_return_type) { 109 // Consume return type if it's expected. 110 if (!ConsumeTypename()) 111 return None; 112 } 113 114 auto maybe_name = ParseFullNameImpl(); 115 if (!maybe_name) { 116 return None; 117 } 118 119 size_t argument_start = GetCurrentPosition(); 120 if (!ConsumeArguments()) { 121 return None; 122 } 123 124 size_t qualifiers_start = GetCurrentPosition(); 125 SkipFunctionQualifiers(); 126 size_t end_position = GetCurrentPosition(); 127 128 ParsedFunction result; 129 result.name.basename = GetTextForRange(maybe_name.getValue().basename_range); 130 result.name.context = GetTextForRange(maybe_name.getValue().context_range); 131 result.arguments = GetTextForRange(Range(argument_start, qualifiers_start)); 132 result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position)); 133 start_position.Remove(); 134 return result; 135} 136 137Optional<ParsedFunction> 138CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) { 139 Bookmark start_position = SetBookmark(); 140 if (expect_return_type) { 141 // Consume return type. 142 if (!ConsumeTypename()) 143 return None; 144 } 145 146 if (!ConsumeToken(tok::l_paren)) 147 return None; 148 if (!ConsumePtrsAndRefs()) 149 return None; 150 151 { 152 Bookmark before_inner_function_pos = SetBookmark(); 153 auto maybe_inner_function_name = ParseFunctionImpl(false); 154 if (maybe_inner_function_name) 155 if (ConsumeToken(tok::r_paren)) 156 if (ConsumeArguments()) { 157 SkipFunctionQualifiers(); 158 start_position.Remove(); 159 before_inner_function_pos.Remove(); 160 return maybe_inner_function_name; 161 } 162 } 163 164 auto maybe_inner_function_ptr_name = ParseFuncPtr(false); 165 if (maybe_inner_function_ptr_name) 166 if (ConsumeToken(tok::r_paren)) 167 if (ConsumeArguments()) { 168 SkipFunctionQualifiers(); 169 start_position.Remove(); 170 return maybe_inner_function_ptr_name; 171 } 172 return None; 173} 174 175bool CPlusPlusNameParser::ConsumeArguments() { 176 return ConsumeBrackets(tok::l_paren, tok::r_paren); 177} 178 179bool CPlusPlusNameParser::ConsumeTemplateArgs() { 180 Bookmark start_position = SetBookmark(); 181 if (!HasMoreTokens() || Peek().getKind() != tok::less) 182 return false; 183 Advance(); 184 185 // Consuming template arguments is a bit trickier than consuming function 186 // arguments, because '<' '>' brackets are not always trivially balanced. 187 // In some rare cases tokens '<' and '>' can appear inside template arguments 188 // as arithmetic or shift operators not as template brackets. 189 // Examples: std::enable_if<(10u)<(64), bool> 190 // f<A<operator<(X,Y)::Subclass>> 191 // Good thing that compiler makes sure that really ambiguous cases of 192 // '>' usage should be enclosed within '()' brackets. 193 int template_counter = 1; 194 bool can_open_template = false; 195 while (HasMoreTokens() && template_counter > 0) { 196 tok::TokenKind kind = Peek().getKind(); 197 switch (kind) { 198 case tok::greatergreater: 199 template_counter -= 2; 200 can_open_template = false; 201 Advance(); 202 break; 203 case tok::greater: 204 --template_counter; 205 can_open_template = false; 206 Advance(); 207 break; 208 case tok::less: 209 // '<' is an attempt to open a subteamplte 210 // check if parser is at the point where it's actually possible, 211 // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. 212 // No need to do the same for '>' because compiler actually makes sure 213 // that '>' always surrounded by brackets to avoid ambiguity. 214 if (can_open_template) 215 ++template_counter; 216 can_open_template = false; 217 Advance(); 218 break; 219 case tok::kw_operator: // C++ operator overloading. 220 if (!ConsumeOperator()) 221 return false; 222 can_open_template = true; 223 break; 224 case tok::raw_identifier: 225 can_open_template = true; 226 Advance(); 227 break; 228 case tok::l_square: 229 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 230 return false; 231 can_open_template = false; 232 break; 233 case tok::l_paren: 234 if (!ConsumeArguments()) 235 return false; 236 can_open_template = false; 237 break; 238 default: 239 can_open_template = false; 240 Advance(); 241 break; 242 } 243 } 244 245 assert(template_counter >= 0); 246 if (template_counter > 0) { 247 return false; 248 } 249 start_position.Remove(); 250 return true; 251} 252 253bool CPlusPlusNameParser::ConsumeAnonymousNamespace() { 254 Bookmark start_position = SetBookmark(); 255 if (!ConsumeToken(tok::l_paren)) { 256 return false; 257 } 258 constexpr llvm::StringLiteral g_anonymous("anonymous"); 259 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 260 Peek().getRawIdentifier() == g_anonymous) { 261 Advance(); 262 } else { 263 return false; 264 } 265 266 if (!ConsumeToken(tok::kw_namespace)) { 267 return false; 268 } 269 270 if (!ConsumeToken(tok::r_paren)) { 271 return false; 272 } 273 start_position.Remove(); 274 return true; 275} 276 277bool CPlusPlusNameParser::ConsumeLambda() { 278 Bookmark start_position = SetBookmark(); 279 if (!ConsumeToken(tok::l_brace)) { 280 return false; 281 } 282 constexpr llvm::StringLiteral g_lambda("lambda"); 283 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 284 Peek().getRawIdentifier() == g_lambda) { 285 // Put the matched brace back so we can use ConsumeBrackets 286 TakeBack(); 287 } else { 288 return false; 289 } 290 291 if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) { 292 return false; 293 } 294 295 start_position.Remove(); 296 return true; 297} 298 299bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left, 300 tok::TokenKind right) { 301 Bookmark start_position = SetBookmark(); 302 if (!HasMoreTokens() || Peek().getKind() != left) 303 return false; 304 Advance(); 305 306 int counter = 1; 307 while (HasMoreTokens() && counter > 0) { 308 tok::TokenKind kind = Peek().getKind(); 309 if (kind == right) 310 --counter; 311 else if (kind == left) 312 ++counter; 313 Advance(); 314 } 315 316 assert(counter >= 0); 317 if (counter > 0) { 318 return false; 319 } 320 start_position.Remove(); 321 return true; 322} 323 324bool CPlusPlusNameParser::ConsumeOperator() { 325 Bookmark start_position = SetBookmark(); 326 if (!ConsumeToken(tok::kw_operator)) 327 return false; 328 329 if (!HasMoreTokens()) { 330 return false; 331 } 332 333 const auto &token = Peek(); 334 switch (token.getKind()) { 335 case tok::kw_new: 336 case tok::kw_delete: 337 // This is 'new' or 'delete' operators. 338 Advance(); 339 // Check for array new/delete. 340 if (HasMoreTokens() && Peek().is(tok::l_square)) { 341 // Consume the '[' and ']'. 342 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 343 return false; 344 } 345 break; 346 347#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \ 348 case tok::Token: \ 349 Advance(); \ 350 break; 351#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly) 352#include "clang/Basic/OperatorKinds.def" 353#undef OVERLOADED_OPERATOR 354#undef OVERLOADED_OPERATOR_MULTI 355 356 case tok::l_paren: 357 // Call operator consume '(' ... ')'. 358 if (ConsumeBrackets(tok::l_paren, tok::r_paren)) 359 break; 360 return false; 361 362 case tok::l_square: 363 // This is a [] operator. 364 // Consume the '[' and ']'. 365 if (ConsumeBrackets(tok::l_square, tok::r_square)) 366 break; 367 return false; 368 369 default: 370 // This might be a cast operator. 371 if (ConsumeTypename()) 372 break; 373 return false; 374 } 375 start_position.Remove(); 376 return true; 377} 378 379void CPlusPlusNameParser::SkipTypeQualifiers() { 380 while (ConsumeToken(tok::kw_const, tok::kw_volatile)) 381 ; 382} 383 384void CPlusPlusNameParser::SkipFunctionQualifiers() { 385 while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp)) 386 ; 387} 388 389bool CPlusPlusNameParser::ConsumeBuiltinType() { 390 bool result = false; 391 bool continue_parsing = true; 392 // Built-in types can be made of a few keywords 393 // like 'unsigned long long int'. This function 394 // consumes all built-in type keywords without 395 // checking if they make sense like 'unsigned char void'. 396 while (continue_parsing && HasMoreTokens()) { 397 switch (Peek().getKind()) { 398 case tok::kw_short: 399 case tok::kw_long: 400 case tok::kw___int64: 401 case tok::kw___int128: 402 case tok::kw_signed: 403 case tok::kw_unsigned: 404 case tok::kw_void: 405 case tok::kw_char: 406 case tok::kw_int: 407 case tok::kw_half: 408 case tok::kw_float: 409 case tok::kw_double: 410 case tok::kw___float128: 411 case tok::kw_wchar_t: 412 case tok::kw_bool: 413 case tok::kw_char16_t: 414 case tok::kw_char32_t: 415 result = true; 416 Advance(); 417 break; 418 default: 419 continue_parsing = false; 420 break; 421 } 422 } 423 return result; 424} 425 426void CPlusPlusNameParser::SkipPtrsAndRefs() { 427 // Ignoring result. 428 ConsumePtrsAndRefs(); 429} 430 431bool CPlusPlusNameParser::ConsumePtrsAndRefs() { 432 bool found = false; 433 SkipTypeQualifiers(); 434 while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const, 435 tok::kw_volatile)) { 436 found = true; 437 SkipTypeQualifiers(); 438 } 439 return found; 440} 441 442bool CPlusPlusNameParser::ConsumeDecltype() { 443 Bookmark start_position = SetBookmark(); 444 if (!ConsumeToken(tok::kw_decltype)) 445 return false; 446 447 if (!ConsumeArguments()) 448 return false; 449 450 start_position.Remove(); 451 return true; 452} 453 454bool CPlusPlusNameParser::ConsumeTypename() { 455 Bookmark start_position = SetBookmark(); 456 SkipTypeQualifiers(); 457 if (!ConsumeBuiltinType() && !ConsumeDecltype()) { 458 if (!ParseFullNameImpl()) 459 return false; 460 } 461 SkipPtrsAndRefs(); 462 start_position.Remove(); 463 return true; 464} 465 466Optional<CPlusPlusNameParser::ParsedNameRanges> 467CPlusPlusNameParser::ParseFullNameImpl() { 468 // Name parsing state machine. 469 enum class State { 470 Beginning, // start of the name 471 AfterTwoColons, // right after :: 472 AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+) 473 AfterTemplate, // right after template brackets (<something>) 474 AfterOperator, // right after name of C++ operator 475 }; 476 477 Bookmark start_position = SetBookmark(); 478 State state = State::Beginning; 479 bool continue_parsing = true; 480 Optional<size_t> last_coloncolon_position = None; 481 482 while (continue_parsing && HasMoreTokens()) { 483 const auto &token = Peek(); 484 switch (token.getKind()) { 485 case tok::raw_identifier: // Just a name. 486 if (state != State::Beginning && state != State::AfterTwoColons) { 487 continue_parsing = false; 488 break; 489 } 490 Advance(); 491 state = State::AfterIdentifier; 492 break; 493 case tok::l_paren: { 494 if (state == State::Beginning || state == State::AfterTwoColons) { 495 // (anonymous namespace) 496 if (ConsumeAnonymousNamespace()) { 497 state = State::AfterIdentifier; 498 break; 499 } 500 } 501 502 // Type declared inside a function 'func()::Type' 503 if (state != State::AfterIdentifier && state != State::AfterTemplate && 504 state != State::AfterOperator) { 505 continue_parsing = false; 506 break; 507 } 508 Bookmark l_paren_position = SetBookmark(); 509 // Consume the '(' ... ') [const]'. 510 if (!ConsumeArguments()) { 511 continue_parsing = false; 512 break; 513 } 514 SkipFunctionQualifiers(); 515 516 // Consume '::' 517 size_t coloncolon_position = GetCurrentPosition(); 518 if (!ConsumeToken(tok::coloncolon)) { 519 continue_parsing = false; 520 break; 521 } 522 l_paren_position.Remove(); 523 last_coloncolon_position = coloncolon_position; 524 state = State::AfterTwoColons; 525 break; 526 } 527 case tok::l_brace: 528 if (state == State::Beginning || state == State::AfterTwoColons) { 529 if (ConsumeLambda()) { 530 state = State::AfterIdentifier; 531 break; 532 } 533 } 534 continue_parsing = false; 535 break; 536 case tok::coloncolon: // Type nesting delimiter. 537 if (state != State::Beginning && state != State::AfterIdentifier && 538 state != State::AfterTemplate) { 539 continue_parsing = false; 540 break; 541 } 542 last_coloncolon_position = GetCurrentPosition(); 543 Advance(); 544 state = State::AfterTwoColons; 545 break; 546 case tok::less: // Template brackets. 547 if (state != State::AfterIdentifier && state != State::AfterOperator) { 548 continue_parsing = false; 549 break; 550 } 551 if (!ConsumeTemplateArgs()) { 552 continue_parsing = false; 553 break; 554 } 555 state = State::AfterTemplate; 556 break; 557 case tok::kw_operator: // C++ operator overloading. 558 if (state != State::Beginning && state != State::AfterTwoColons) { 559 continue_parsing = false; 560 break; 561 } 562 if (!ConsumeOperator()) { 563 continue_parsing = false; 564 break; 565 } 566 state = State::AfterOperator; 567 break; 568 case tok::tilde: // Destructor. 569 if (state != State::Beginning && state != State::AfterTwoColons) { 570 continue_parsing = false; 571 break; 572 } 573 Advance(); 574 if (ConsumeToken(tok::raw_identifier)) { 575 state = State::AfterIdentifier; 576 } else { 577 TakeBack(); 578 continue_parsing = false; 579 } 580 break; 581 default: 582 continue_parsing = false; 583 break; 584 } 585 } 586 587 if (state == State::AfterIdentifier || state == State::AfterOperator || 588 state == State::AfterTemplate) { 589 ParsedNameRanges result; 590 if (last_coloncolon_position) { 591 result.context_range = Range(start_position.GetSavedPosition(), 592 last_coloncolon_position.getValue()); 593 result.basename_range = 594 Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition()); 595 } else { 596 result.basename_range = 597 Range(start_position.GetSavedPosition(), GetCurrentPosition()); 598 } 599 start_position.Remove(); 600 return result; 601 } else { 602 return None; 603 } 604} 605 606llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) { 607 if (range.empty()) 608 return llvm::StringRef(); 609 assert(range.begin_index < range.end_index); 610 assert(range.begin_index < m_tokens.size()); 611 assert(range.end_index <= m_tokens.size()); 612 clang::Token &first_token = m_tokens[range.begin_index]; 613 clang::Token &last_token = m_tokens[range.end_index - 1]; 614 clang::SourceLocation start_loc = first_token.getLocation(); 615 clang::SourceLocation end_loc = last_token.getLocation(); 616 unsigned start_pos = start_loc.getRawEncoding(); 617 unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength(); 618 return m_text.take_front(end_pos).drop_front(start_pos); 619} 620 621static const clang::LangOptions &GetLangOptions() { 622 static clang::LangOptions g_options; 623 static llvm::once_flag g_once_flag; 624 llvm::call_once(g_once_flag, []() { 625 g_options.LineComment = true; 626 g_options.C99 = true; 627 g_options.C11 = true; 628 g_options.CPlusPlus = true; 629 g_options.CPlusPlus11 = true; 630 g_options.CPlusPlus14 = true; 631 g_options.CPlusPlus17 = true; 632 }); 633 return g_options; 634} 635 636static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() { 637 static llvm::StringMap<tok::TokenKind> g_map{ 638#define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name}, 639#include "clang/Basic/TokenKinds.def" 640#undef KEYWORD 641 }; 642 return g_map; 643} 644 645void CPlusPlusNameParser::ExtractTokens() { 646 clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(), 647 m_text.data(), m_text.data() + m_text.size()); 648 const auto &kw_map = GetKeywordsMap(); 649 clang::Token token; 650 for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof); 651 lexer.LexFromRawLexer(token)) { 652 if (token.is(clang::tok::raw_identifier)) { 653 auto it = kw_map.find(token.getRawIdentifier()); 654 if (it != kw_map.end()) { 655 token.setKind(it->getValue()); 656 } 657 } 658 659 m_tokens.push_back(token); 660 } 661} 662