CPlusPlusNameParser.cpp revision 317032
1//===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "CPlusPlusNameParser.h" 11 12#include "clang/Basic/IdentifierTable.h" 13#include "llvm/ADT/StringMap.h" 14#include "llvm/Support/Threading.h" 15 16using namespace lldb; 17using namespace lldb_private; 18using llvm::Optional; 19using llvm::None; 20using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction; 21using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName; 22namespace tok = clang::tok; 23 24Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() { 25 m_next_token_index = 0; 26 Optional<ParsedFunction> result(None); 27 28 // Try to parse the name as function without a return type specified 29 // e.g. main(int, char*[]) 30 { 31 Bookmark start_position = SetBookmark(); 32 result = ParseFunctionImpl(false); 33 if (result && !HasMoreTokens()) 34 return result; 35 } 36 37 // Try to parse the name as function with function pointer return type 38 // e.g. void (*get_func(const char*))() 39 result = ParseFuncPtr(true); 40 if (result) 41 return result; 42 43 // Finally try to parse the name as a function with non-function return type 44 // e.g. int main(int, char*[]) 45 result = ParseFunctionImpl(true); 46 if (HasMoreTokens()) 47 return None; 48 return result; 49} 50 51Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() { 52 m_next_token_index = 0; 53 Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl(); 54 if (!name_ranges) 55 return None; 56 if (HasMoreTokens()) 57 return None; 58 ParsedName result; 59 result.basename = GetTextForRange(name_ranges.getValue().basename_range); 60 result.context = GetTextForRange(name_ranges.getValue().context_range); 61 return result; 62} 63 64bool CPlusPlusNameParser::HasMoreTokens() { 65 return m_next_token_index < m_tokens.size(); 66} 67 68void CPlusPlusNameParser::Advance() { ++m_next_token_index; } 69 70void CPlusPlusNameParser::TakeBack() { --m_next_token_index; } 71 72bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) { 73 if (!HasMoreTokens()) 74 return false; 75 76 if (!Peek().is(kind)) 77 return false; 78 79 Advance(); 80 return true; 81} 82 83template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) { 84 if (!HasMoreTokens()) 85 return false; 86 87 if (!Peek().isOneOf(kinds...)) 88 return false; 89 90 Advance(); 91 return true; 92} 93 94CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() { 95 return Bookmark(m_next_token_index); 96} 97 98size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; } 99 100clang::Token &CPlusPlusNameParser::Peek() { 101 assert(HasMoreTokens()); 102 return m_tokens[m_next_token_index]; 103} 104 105Optional<ParsedFunction> 106CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) { 107 Bookmark start_position = SetBookmark(); 108 if (expect_return_type) { 109 // Consume return type if it's expected. 110 if (!ConsumeTypename()) 111 return None; 112 } 113 114 auto maybe_name = ParseFullNameImpl(); 115 if (!maybe_name) { 116 return None; 117 } 118 119 size_t argument_start = GetCurrentPosition(); 120 if (!ConsumeArguments()) { 121 return None; 122 } 123 124 size_t qualifiers_start = GetCurrentPosition(); 125 SkipFunctionQualifiers(); 126 size_t end_position = GetCurrentPosition(); 127 128 ParsedFunction result; 129 result.name.basename = GetTextForRange(maybe_name.getValue().basename_range); 130 result.name.context = GetTextForRange(maybe_name.getValue().context_range); 131 result.arguments = GetTextForRange(Range(argument_start, qualifiers_start)); 132 result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position)); 133 start_position.Remove(); 134 return result; 135} 136 137Optional<ParsedFunction> 138CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) { 139 Bookmark start_position = SetBookmark(); 140 if (expect_return_type) { 141 // Consume return type. 142 if (!ConsumeTypename()) 143 return None; 144 } 145 146 if (!ConsumeToken(tok::l_paren)) 147 return None; 148 if (!ConsumePtrsAndRefs()) 149 return None; 150 151 { 152 Bookmark before_inner_function_pos = SetBookmark(); 153 auto maybe_inner_function_name = ParseFunctionImpl(false); 154 if (maybe_inner_function_name) 155 if (ConsumeToken(tok::r_paren)) 156 if (ConsumeArguments()) { 157 SkipFunctionQualifiers(); 158 start_position.Remove(); 159 before_inner_function_pos.Remove(); 160 return maybe_inner_function_name; 161 } 162 } 163 164 auto maybe_inner_function_ptr_name = ParseFuncPtr(false); 165 if (maybe_inner_function_ptr_name) 166 if (ConsumeToken(tok::r_paren)) 167 if (ConsumeArguments()) { 168 SkipFunctionQualifiers(); 169 start_position.Remove(); 170 return maybe_inner_function_ptr_name; 171 } 172 return None; 173} 174 175bool CPlusPlusNameParser::ConsumeArguments() { 176 return ConsumeBrackets(tok::l_paren, tok::r_paren); 177} 178 179bool CPlusPlusNameParser::ConsumeTemplateArgs() { 180 Bookmark start_position = SetBookmark(); 181 if (!HasMoreTokens() || Peek().getKind() != tok::less) 182 return false; 183 Advance(); 184 185 // Consuming template arguments is a bit trickier than consuming function 186 // arguments, because '<' '>' brackets are not always trivially balanced. 187 // In some rare cases tokens '<' and '>' can appear inside template arguments 188 // as arithmetic or shift operators not as template brackets. 189 // Examples: std::enable_if<(10u)<(64), bool> 190 // f<A<operator<(X,Y)::Subclass>> 191 // Good thing that compiler makes sure that really ambiguous cases of 192 // '>' usage should be enclosed within '()' brackets. 193 int template_counter = 1; 194 bool can_open_template = false; 195 while (HasMoreTokens() && template_counter > 0) { 196 tok::TokenKind kind = Peek().getKind(); 197 switch (kind) { 198 case tok::greatergreater: 199 template_counter -= 2; 200 can_open_template = false; 201 Advance(); 202 break; 203 case tok::greater: 204 --template_counter; 205 can_open_template = false; 206 Advance(); 207 break; 208 case tok::less: 209 // '<' is an attempt to open a subteamplte 210 // check if parser is at the point where it's actually possible, 211 // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. 212 // No need to do the same for '>' because compiler actually makes sure 213 // that '>' always surrounded by brackets to avoid ambiguity. 214 if (can_open_template) 215 ++template_counter; 216 can_open_template = false; 217 Advance(); 218 break; 219 case tok::kw_operator: // C++ operator overloading. 220 if (!ConsumeOperator()) 221 return false; 222 can_open_template = true; 223 break; 224 case tok::raw_identifier: 225 can_open_template = true; 226 Advance(); 227 break; 228 case tok::l_square: 229 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 230 return false; 231 can_open_template = false; 232 break; 233 case tok::l_paren: 234 if (!ConsumeArguments()) 235 return false; 236 can_open_template = false; 237 break; 238 default: 239 can_open_template = false; 240 Advance(); 241 break; 242 } 243 } 244 245 assert(template_counter >= 0); 246 if (template_counter > 0) { 247 return false; 248 } 249 start_position.Remove(); 250 return true; 251} 252 253bool CPlusPlusNameParser::ConsumeAnonymousNamespace() { 254 Bookmark start_position = SetBookmark(); 255 if (!ConsumeToken(tok::l_paren)) { 256 return false; 257 } 258 constexpr llvm::StringLiteral g_anonymous("anonymous"); 259 if (HasMoreTokens() && Peek().is(tok::raw_identifier) && 260 Peek().getRawIdentifier() == g_anonymous) { 261 Advance(); 262 } else { 263 return false; 264 } 265 266 if (!ConsumeToken(tok::kw_namespace)) { 267 return false; 268 } 269 270 if (!ConsumeToken(tok::r_paren)) { 271 return false; 272 } 273 start_position.Remove(); 274 return true; 275} 276 277bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left, 278 tok::TokenKind right) { 279 Bookmark start_position = SetBookmark(); 280 if (!HasMoreTokens() || Peek().getKind() != left) 281 return false; 282 Advance(); 283 284 int counter = 1; 285 while (HasMoreTokens() && counter > 0) { 286 tok::TokenKind kind = Peek().getKind(); 287 if (kind == right) 288 --counter; 289 else if (kind == left) 290 ++counter; 291 Advance(); 292 } 293 294 assert(counter >= 0); 295 if (counter > 0) { 296 return false; 297 } 298 start_position.Remove(); 299 return true; 300} 301 302bool CPlusPlusNameParser::ConsumeOperator() { 303 Bookmark start_position = SetBookmark(); 304 if (!ConsumeToken(tok::kw_operator)) 305 return false; 306 307 if (!HasMoreTokens()) { 308 return false; 309 } 310 311 const auto &token = Peek(); 312 switch (token.getKind()) { 313 case tok::kw_new: 314 case tok::kw_delete: 315 // This is 'new' or 'delete' operators. 316 Advance(); 317 // Check for array new/delete. 318 if (HasMoreTokens() && Peek().is(tok::l_square)) { 319 // Consume the '[' and ']'. 320 if (!ConsumeBrackets(tok::l_square, tok::r_square)) 321 return false; 322 } 323 break; 324 325#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \ 326 case tok::Token: \ 327 Advance(); \ 328 break; 329#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly) 330#include "clang/Basic/OperatorKinds.def" 331#undef OVERLOADED_OPERATOR 332#undef OVERLOADED_OPERATOR_MULTI 333 334 case tok::l_paren: 335 // Call operator consume '(' ... ')'. 336 if (ConsumeBrackets(tok::l_paren, tok::r_paren)) 337 break; 338 return false; 339 340 case tok::l_square: 341 // This is a [] operator. 342 // Consume the '[' and ']'. 343 if (ConsumeBrackets(tok::l_square, tok::r_square)) 344 break; 345 return false; 346 347 default: 348 // This might be a cast operator. 349 if (ConsumeTypename()) 350 break; 351 return false; 352 } 353 start_position.Remove(); 354 return true; 355} 356 357void CPlusPlusNameParser::SkipTypeQualifiers() { 358 while (ConsumeToken(tok::kw_const, tok::kw_volatile)) 359 ; 360} 361 362void CPlusPlusNameParser::SkipFunctionQualifiers() { 363 while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp)) 364 ; 365} 366 367bool CPlusPlusNameParser::ConsumeBuiltinType() { 368 bool result = false; 369 bool continue_parsing = true; 370 // Built-in types can be made of a few keywords 371 // like 'unsigned long long int'. This function 372 // consumes all built-in type keywords without 373 // checking if they make sense like 'unsigned char void'. 374 while (continue_parsing && HasMoreTokens()) { 375 switch (Peek().getKind()) { 376 case tok::kw_short: 377 case tok::kw_long: 378 case tok::kw___int64: 379 case tok::kw___int128: 380 case tok::kw_signed: 381 case tok::kw_unsigned: 382 case tok::kw_void: 383 case tok::kw_char: 384 case tok::kw_int: 385 case tok::kw_half: 386 case tok::kw_float: 387 case tok::kw_double: 388 case tok::kw___float128: 389 case tok::kw_wchar_t: 390 case tok::kw_bool: 391 case tok::kw_char16_t: 392 case tok::kw_char32_t: 393 result = true; 394 Advance(); 395 break; 396 default: 397 continue_parsing = false; 398 break; 399 } 400 } 401 return result; 402} 403 404void CPlusPlusNameParser::SkipPtrsAndRefs() { 405 // Ignoring result. 406 ConsumePtrsAndRefs(); 407} 408 409bool CPlusPlusNameParser::ConsumePtrsAndRefs() { 410 bool found = false; 411 SkipTypeQualifiers(); 412 while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const, 413 tok::kw_volatile)) { 414 found = true; 415 SkipTypeQualifiers(); 416 } 417 return found; 418} 419 420bool CPlusPlusNameParser::ConsumeDecltype() { 421 Bookmark start_position = SetBookmark(); 422 if (!ConsumeToken(tok::kw_decltype)) 423 return false; 424 425 if (!ConsumeArguments()) 426 return false; 427 428 start_position.Remove(); 429 return true; 430} 431 432bool CPlusPlusNameParser::ConsumeTypename() { 433 Bookmark start_position = SetBookmark(); 434 SkipTypeQualifiers(); 435 if (!ConsumeBuiltinType() && !ConsumeDecltype()) { 436 if (!ParseFullNameImpl()) 437 return false; 438 } 439 SkipPtrsAndRefs(); 440 start_position.Remove(); 441 return true; 442} 443 444Optional<CPlusPlusNameParser::ParsedNameRanges> 445CPlusPlusNameParser::ParseFullNameImpl() { 446 // Name parsing state machine. 447 enum class State { 448 Beginning, // start of the name 449 AfterTwoColons, // right after :: 450 AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+) 451 AfterTemplate, // right after template brackets (<something>) 452 AfterOperator, // right after name of C++ operator 453 }; 454 455 Bookmark start_position = SetBookmark(); 456 State state = State::Beginning; 457 bool continue_parsing = true; 458 Optional<size_t> last_coloncolon_position = None; 459 460 while (continue_parsing && HasMoreTokens()) { 461 const auto &token = Peek(); 462 switch (token.getKind()) { 463 case tok::raw_identifier: // Just a name. 464 if (state != State::Beginning && state != State::AfterTwoColons) { 465 continue_parsing = false; 466 break; 467 } 468 Advance(); 469 state = State::AfterIdentifier; 470 break; 471 case tok::l_paren: { 472 if (state == State::Beginning || state == State::AfterTwoColons) { 473 // (anonymous namespace) 474 if (ConsumeAnonymousNamespace()) { 475 state = State::AfterIdentifier; 476 break; 477 } 478 } 479 480 // Type declared inside a function 'func()::Type' 481 if (state != State::AfterIdentifier && state != State::AfterTemplate && 482 state != State::AfterOperator) { 483 continue_parsing = false; 484 break; 485 } 486 Bookmark l_paren_position = SetBookmark(); 487 // Consume the '(' ... ') [const]'. 488 if (!ConsumeArguments()) { 489 continue_parsing = false; 490 break; 491 } 492 SkipFunctionQualifiers(); 493 494 // Consume '::' 495 size_t coloncolon_position = GetCurrentPosition(); 496 if (!ConsumeToken(tok::coloncolon)) { 497 continue_parsing = false; 498 break; 499 } 500 l_paren_position.Remove(); 501 last_coloncolon_position = coloncolon_position; 502 state = State::AfterTwoColons; 503 break; 504 } 505 case tok::coloncolon: // Type nesting delimiter. 506 if (state != State::Beginning && state != State::AfterIdentifier && 507 state != State::AfterTemplate) { 508 continue_parsing = false; 509 break; 510 } 511 last_coloncolon_position = GetCurrentPosition(); 512 Advance(); 513 state = State::AfterTwoColons; 514 break; 515 case tok::less: // Template brackets. 516 if (state != State::AfterIdentifier && state != State::AfterOperator) { 517 continue_parsing = false; 518 break; 519 } 520 if (!ConsumeTemplateArgs()) { 521 continue_parsing = false; 522 break; 523 } 524 state = State::AfterTemplate; 525 break; 526 case tok::kw_operator: // C++ operator overloading. 527 if (state != State::Beginning && state != State::AfterTwoColons) { 528 continue_parsing = false; 529 break; 530 } 531 if (!ConsumeOperator()) { 532 continue_parsing = false; 533 break; 534 } 535 state = State::AfterOperator; 536 break; 537 case tok::tilde: // Destructor. 538 if (state != State::Beginning && state != State::AfterTwoColons) { 539 continue_parsing = false; 540 break; 541 } 542 Advance(); 543 if (ConsumeToken(tok::raw_identifier)) { 544 state = State::AfterIdentifier; 545 } else { 546 TakeBack(); 547 continue_parsing = false; 548 } 549 break; 550 default: 551 continue_parsing = false; 552 break; 553 } 554 } 555 556 if (state == State::AfterIdentifier || state == State::AfterOperator || 557 state == State::AfterTemplate) { 558 ParsedNameRanges result; 559 if (last_coloncolon_position) { 560 result.context_range = Range(start_position.GetSavedPosition(), 561 last_coloncolon_position.getValue()); 562 result.basename_range = 563 Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition()); 564 } else { 565 result.basename_range = 566 Range(start_position.GetSavedPosition(), GetCurrentPosition()); 567 } 568 start_position.Remove(); 569 return result; 570 } else { 571 return None; 572 } 573} 574 575llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) { 576 if (range.empty()) 577 return llvm::StringRef(); 578 assert(range.begin_index < range.end_index); 579 assert(range.begin_index < m_tokens.size()); 580 assert(range.end_index <= m_tokens.size()); 581 clang::Token &first_token = m_tokens[range.begin_index]; 582 clang::Token &last_token = m_tokens[range.end_index - 1]; 583 clang::SourceLocation start_loc = first_token.getLocation(); 584 clang::SourceLocation end_loc = last_token.getLocation(); 585 unsigned start_pos = start_loc.getRawEncoding(); 586 unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength(); 587 return m_text.take_front(end_pos).drop_front(start_pos); 588} 589 590static const clang::LangOptions &GetLangOptions() { 591 static clang::LangOptions g_options; 592 static llvm::once_flag g_once_flag; 593 llvm::call_once(g_once_flag, []() { 594 g_options.LineComment = true; 595 g_options.C99 = true; 596 g_options.C11 = true; 597 g_options.CPlusPlus = true; 598 g_options.CPlusPlus11 = true; 599 g_options.CPlusPlus14 = true; 600 g_options.CPlusPlus1z = true; 601 }); 602 return g_options; 603} 604 605static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() { 606 static llvm::StringMap<tok::TokenKind> g_map{ 607#define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name}, 608#include "clang/Basic/TokenKinds.def" 609#undef KEYWORD 610 }; 611 return g_map; 612} 613 614void CPlusPlusNameParser::ExtractTokens() { 615 clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(), 616 m_text.data(), m_text.data() + m_text.size()); 617 const auto &kw_map = GetKeywordsMap(); 618 clang::Token token; 619 for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof); 620 lexer.LexFromRawLexer(token)) { 621 if (token.is(clang::tok::raw_identifier)) { 622 auto it = kw_map.find(token.getRawIdentifier()); 623 if (it != kw_map.end()) { 624 token.setKind(it->getValue()); 625 } 626 } 627 628 m_tokens.push_back(token); 629 } 630} 631