1//===- DependencyDirectivesSourceMinimizer.cpp - -------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8/// 9/// \file 10/// This is the implementation for minimizing header and source files to the 11/// minimum necessary preprocessor directives for evaluating includes. It 12/// reduces the source down to #define, #include, #import, @import, and any 13/// conditional preprocessor logic that contains one of those. 14/// 15//===----------------------------------------------------------------------===// 16 17#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" 18#include "clang/Basic/CharInfo.h" 19#include "clang/Basic/Diagnostic.h" 20#include "clang/Lex/LexDiagnostic.h" 21#include "llvm/ADT/StringMap.h" 22#include "llvm/ADT/StringSwitch.h" 23#include "llvm/Support/MemoryBuffer.h" 24 25using namespace llvm; 26using namespace clang; 27using namespace clang::minimize_source_to_dependency_directives; 28 29namespace { 30 31struct Minimizer { 32 /// Minimized output. 33 SmallVectorImpl<char> &Out; 34 /// The known tokens encountered during the minimization. 35 SmallVectorImpl<Token> &Tokens; 36 37 Minimizer(SmallVectorImpl<char> &Out, SmallVectorImpl<Token> &Tokens, 38 StringRef Input, DiagnosticsEngine *Diags, 39 SourceLocation InputSourceLoc) 40 : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags), 41 InputSourceLoc(InputSourceLoc) {} 42 43 /// Lex the provided source and emit the minimized output. 44 /// 45 /// \returns True on error. 46 bool minimize(); 47 48private: 49 struct IdInfo { 50 const char *Last; 51 StringRef Name; 52 }; 53 54 /// Lex an identifier. 55 /// 56 /// \pre First points at a valid identifier head. 57 LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End); 58 LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First, 59 const char *const End); 60 LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End); 61 LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End); 62 LLVM_NODISCARD bool lexAt(const char *&First, const char *const End); 63 LLVM_NODISCARD bool lexModule(const char *&First, const char *const End); 64 LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End); 65 LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End); 66 LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End); 67 LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive, 68 const char *&First, const char *const End); 69 Token &makeToken(TokenKind K) { 70 Tokens.emplace_back(K, Out.size()); 71 return Tokens.back(); 72 } 73 void popToken() { 74 Out.resize(Tokens.back().Offset); 75 Tokens.pop_back(); 76 } 77 TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; } 78 79 Minimizer &put(char Byte) { 80 Out.push_back(Byte); 81 return *this; 82 } 83 Minimizer &append(StringRef S) { return append(S.begin(), S.end()); } 84 Minimizer &append(const char *First, const char *Last) { 85 Out.append(First, Last); 86 return *this; 87 } 88 89 void printToNewline(const char *&First, const char *const End); 90 void printAdjacentModuleNameParts(const char *&First, const char *const End); 91 LLVM_NODISCARD bool printAtImportBody(const char *&First, 92 const char *const End); 93 void printDirectiveBody(const char *&First, const char *const End); 94 void printAdjacentMacroArgs(const char *&First, const char *const End); 95 LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End); 96 97 /// Reports a diagnostic if the diagnostic engine is provided. Always returns 98 /// true at the end. 99 bool reportError(const char *CurPtr, unsigned Err); 100 101 StringMap<char> SplitIds; 102 StringRef Input; 103 DiagnosticsEngine *Diags; 104 SourceLocation InputSourceLoc; 105}; 106 107} // end anonymous namespace 108 109bool Minimizer::reportError(const char *CurPtr, unsigned Err) { 110 if (!Diags) 111 return true; 112 assert(CurPtr >= Input.data() && "invalid buffer ptr"); 113 Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err); 114 return true; 115} 116 117static void skipOverSpaces(const char *&First, const char *const End) { 118 while (First != End && isHorizontalWhitespace(*First)) 119 ++First; 120} 121 122LLVM_NODISCARD static bool isRawStringLiteral(const char *First, 123 const char *Current) { 124 assert(First <= Current); 125 126 // Check if we can even back up. 127 if (*Current != '"' || First == Current) 128 return false; 129 130 // Check for an "R". 131 --Current; 132 if (*Current != 'R') 133 return false; 134 if (First == Current || !isIdentifierBody(*--Current)) 135 return true; 136 137 // Check for a prefix of "u", "U", or "L". 138 if (*Current == 'u' || *Current == 'U' || *Current == 'L') 139 return First == Current || !isIdentifierBody(*--Current); 140 141 // Check for a prefix of "u8". 142 if (*Current != '8' || First == Current || *Current-- != 'u') 143 return false; 144 return First == Current || !isIdentifierBody(*--Current); 145} 146 147static void skipRawString(const char *&First, const char *const End) { 148 assert(First[0] == '"'); 149 assert(First[-1] == 'R'); 150 151 const char *Last = ++First; 152 while (Last != End && *Last != '(') 153 ++Last; 154 if (Last == End) { 155 First = Last; // Hit the end... just give up. 156 return; 157 } 158 159 StringRef Terminator(First, Last - First); 160 for (;;) { 161 // Move First to just past the next ")". 162 First = Last; 163 while (First != End && *First != ')') 164 ++First; 165 if (First == End) 166 return; 167 ++First; 168 169 // Look ahead for the terminator sequence. 170 Last = First; 171 while (Last != End && size_t(Last - First) < Terminator.size() && 172 Terminator[Last - First] == *Last) 173 ++Last; 174 175 // Check if we hit it (or the end of the file). 176 if (Last == End) { 177 First = Last; 178 return; 179 } 180 if (size_t(Last - First) < Terminator.size()) 181 continue; 182 if (*Last != '"') 183 continue; 184 First = Last + 1; 185 return; 186 } 187} 188 189// Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n) 190static unsigned isEOL(const char *First, const char *const End) { 191 if (First == End) 192 return 0; 193 if (End - First > 1 && isVerticalWhitespace(First[0]) && 194 isVerticalWhitespace(First[1]) && First[0] != First[1]) 195 return 2; 196 return !!isVerticalWhitespace(First[0]); 197} 198 199static void skipString(const char *&First, const char *const End) { 200 assert(*First == '\'' || *First == '"' || *First == '<'); 201 const char Terminator = *First == '<' ? '>' : *First; 202 for (++First; First != End && *First != Terminator; ++First) { 203 // String and character literals don't extend past the end of the line. 204 if (isVerticalWhitespace(*First)) 205 return; 206 if (*First != '\\') 207 continue; 208 // Skip past backslash to the next character. This ensures that the 209 // character right after it is skipped as well, which matters if it's 210 // the terminator. 211 if (++First == End) 212 return; 213 if (!isWhitespace(*First)) 214 continue; 215 // Whitespace after the backslash might indicate a line continuation. 216 const char *FirstAfterBackslashPastSpace = First; 217 skipOverSpaces(FirstAfterBackslashPastSpace, End); 218 if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) { 219 // Advance the character pointer to the next line for the next 220 // iteration. 221 First = FirstAfterBackslashPastSpace + NLSize - 1; 222 } 223 } 224 if (First != End) 225 ++First; // Finish off the string. 226} 227 228// Returns the length of the skipped newline 229static unsigned skipNewline(const char *&First, const char *End) { 230 if (First == End) 231 return 0; 232 assert(isVerticalWhitespace(*First)); 233 unsigned Len = isEOL(First, End); 234 assert(Len && "expected newline"); 235 First += Len; 236 return Len; 237} 238 239static bool wasLineContinuation(const char *First, unsigned EOLLen) { 240 return *(First - (int)EOLLen - 1) == '\\'; 241} 242 243static void skipToNewlineRaw(const char *&First, const char *const End) { 244 for (;;) { 245 if (First == End) 246 return; 247 248 unsigned Len = isEOL(First, End); 249 if (Len) 250 return; 251 252 do { 253 if (++First == End) 254 return; 255 Len = isEOL(First, End); 256 } while (!Len); 257 258 if (First[-1] != '\\') 259 return; 260 261 First += Len; 262 // Keep skipping lines... 263 } 264} 265 266static const char *findLastNonSpace(const char *First, const char *Last) { 267 assert(First <= Last); 268 while (First != Last && isHorizontalWhitespace(Last[-1])) 269 --Last; 270 return Last; 271} 272 273static const char *findFirstTrailingSpace(const char *First, 274 const char *Last) { 275 const char *LastNonSpace = findLastNonSpace(First, Last); 276 if (Last == LastNonSpace) 277 return Last; 278 assert(isHorizontalWhitespace(LastNonSpace[0])); 279 return LastNonSpace + 1; 280} 281 282static void skipLineComment(const char *&First, const char *const End) { 283 assert(First[0] == '/' && First[1] == '/'); 284 First += 2; 285 skipToNewlineRaw(First, End); 286} 287 288static void skipBlockComment(const char *&First, const char *const End) { 289 assert(First[0] == '/' && First[1] == '*'); 290 if (End - First < 4) { 291 First = End; 292 return; 293 } 294 for (First += 3; First != End; ++First) 295 if (First[-1] == '*' && First[0] == '/') { 296 ++First; 297 return; 298 } 299} 300 301/// \returns True if the current single quotation mark character is a C++ 14 302/// digit separator. 303static bool isQuoteCppDigitSeparator(const char *const Start, 304 const char *const Cur, 305 const char *const End) { 306 assert(*Cur == '\'' && "expected quotation character"); 307 // skipLine called in places where we don't expect a valid number 308 // body before `start` on the same line, so always return false at the start. 309 if (Start == Cur) 310 return false; 311 // The previous character must be a valid PP number character. 312 // Make sure that the L, u, U, u8 prefixes don't get marked as a 313 // separator though. 314 char Prev = *(Cur - 1); 315 if (Prev == 'L' || Prev == 'U' || Prev == 'u') 316 return false; 317 if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') 318 return false; 319 if (!isPreprocessingNumberBody(Prev)) 320 return false; 321 // The next character should be a valid identifier body character. 322 return (Cur + 1) < End && isIdentifierBody(*(Cur + 1)); 323} 324 325static void skipLine(const char *&First, const char *const End) { 326 for (;;) { 327 assert(First <= End); 328 if (First == End) 329 return; 330 331 if (isVerticalWhitespace(*First)) { 332 skipNewline(First, End); 333 return; 334 } 335 const char *Start = First; 336 while (First != End && !isVerticalWhitespace(*First)) { 337 // Iterate over strings correctly to avoid comments and newlines. 338 if (*First == '"' || 339 (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { 340 if (isRawStringLiteral(Start, First)) 341 skipRawString(First, End); 342 else 343 skipString(First, End); 344 continue; 345 } 346 347 // Iterate over comments correctly. 348 if (*First != '/' || End - First < 2) { 349 ++First; 350 continue; 351 } 352 353 if (First[1] == '/') { 354 // "//...". 355 skipLineComment(First, End); 356 continue; 357 } 358 359 if (First[1] != '*') { 360 ++First; 361 continue; 362 } 363 364 // "/*...*/". 365 skipBlockComment(First, End); 366 } 367 if (First == End) 368 return; 369 370 // Skip over the newline. 371 unsigned Len = skipNewline(First, End); 372 if (!wasLineContinuation(First, Len)) // Continue past line-continuations. 373 break; 374 } 375} 376 377static void skipDirective(StringRef Name, const char *&First, 378 const char *const End) { 379 if (llvm::StringSwitch<bool>(Name) 380 .Case("warning", true) 381 .Case("error", true) 382 .Default(false)) 383 // Do not process quotes or comments. 384 skipToNewlineRaw(First, End); 385 else 386 skipLine(First, End); 387} 388 389void Minimizer::printToNewline(const char *&First, const char *const End) { 390 while (First != End && !isVerticalWhitespace(*First)) { 391 const char *Last = First; 392 do { 393 // Iterate over strings correctly to avoid comments and newlines. 394 if (*Last == '"' || *Last == '\'' || 395 (*Last == '<' && top() == pp_include)) { 396 if (LLVM_UNLIKELY(isRawStringLiteral(First, Last))) 397 skipRawString(Last, End); 398 else 399 skipString(Last, End); 400 continue; 401 } 402 if (*Last != '/' || End - Last < 2) { 403 ++Last; 404 continue; // Gather the rest up to print verbatim. 405 } 406 407 if (Last[1] != '/' && Last[1] != '*') { 408 ++Last; 409 continue; 410 } 411 412 // Deal with "//..." and "/*...*/". 413 append(First, findFirstTrailingSpace(First, Last)); 414 First = Last; 415 416 if (Last[1] == '/') { 417 skipLineComment(First, End); 418 return; 419 } 420 421 put(' '); 422 skipBlockComment(First, End); 423 skipOverSpaces(First, End); 424 Last = First; 425 } while (Last != End && !isVerticalWhitespace(*Last)); 426 427 // Print out the string. 428 const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last); 429 if (Last == End || LastBeforeTrailingSpace == First || 430 LastBeforeTrailingSpace[-1] != '\\') { 431 append(First, LastBeforeTrailingSpace); 432 First = Last; 433 skipNewline(First, End); 434 return; 435 } 436 437 // Print up to the backslash, backing up over spaces. Preserve at least one 438 // space, as the space matters when tokens are separated by a line 439 // continuation. 440 append(First, findFirstTrailingSpace( 441 First, LastBeforeTrailingSpace - 1)); 442 443 First = Last; 444 skipNewline(First, End); 445 skipOverSpaces(First, End); 446 } 447} 448 449static void skipWhitespace(const char *&First, const char *const End) { 450 for (;;) { 451 assert(First <= End); 452 skipOverSpaces(First, End); 453 454 if (End - First < 2) 455 return; 456 457 if (First[0] == '\\' && isVerticalWhitespace(First[1])) { 458 skipNewline(++First, End); 459 continue; 460 } 461 462 // Check for a non-comment character. 463 if (First[0] != '/') 464 return; 465 466 // "// ...". 467 if (First[1] == '/') { 468 skipLineComment(First, End); 469 return; 470 } 471 472 // Cannot be a comment. 473 if (First[1] != '*') 474 return; 475 476 // "/*...*/". 477 skipBlockComment(First, End); 478 } 479} 480 481void Minimizer::printAdjacentModuleNameParts(const char *&First, 482 const char *const End) { 483 // Skip over parts of the body. 484 const char *Last = First; 485 do 486 ++Last; 487 while (Last != End && (isIdentifierBody(*Last) || *Last == '.')); 488 append(First, Last); 489 First = Last; 490} 491 492bool Minimizer::printAtImportBody(const char *&First, const char *const End) { 493 for (;;) { 494 skipWhitespace(First, End); 495 if (First == End) 496 return true; 497 498 if (isVerticalWhitespace(*First)) { 499 skipNewline(First, End); 500 continue; 501 } 502 503 // Found a semicolon. 504 if (*First == ';') { 505 put(*First++).put('\n'); 506 return false; 507 } 508 509 // Don't handle macro expansions inside @import for now. 510 if (!isIdentifierBody(*First) && *First != '.') 511 return true; 512 513 printAdjacentModuleNameParts(First, End); 514 } 515} 516 517void Minimizer::printDirectiveBody(const char *&First, const char *const End) { 518 skipWhitespace(First, End); // Skip initial whitespace. 519 printToNewline(First, End); 520 while (Out.back() == ' ') 521 Out.pop_back(); 522 put('\n'); 523} 524 525LLVM_NODISCARD static const char *lexRawIdentifier(const char *First, 526 const char *const End) { 527 assert(isIdentifierBody(*First) && "invalid identifer"); 528 const char *Last = First + 1; 529 while (Last != End && isIdentifierBody(*Last)) 530 ++Last; 531 return Last; 532} 533 534LLVM_NODISCARD static const char * 535getIdentifierContinuation(const char *First, const char *const End) { 536 if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1])) 537 return nullptr; 538 539 ++First; 540 skipNewline(First, End); 541 if (First == End) 542 return nullptr; 543 return isIdentifierBody(First[0]) ? First : nullptr; 544} 545 546Minimizer::IdInfo Minimizer::lexIdentifier(const char *First, 547 const char *const End) { 548 const char *Last = lexRawIdentifier(First, End); 549 const char *Next = getIdentifierContinuation(Last, End); 550 if (LLVM_LIKELY(!Next)) 551 return IdInfo{Last, StringRef(First, Last - First)}; 552 553 // Slow path, where identifiers are split over lines. 554 SmallVector<char, 64> Id(First, Last); 555 while (Next) { 556 Last = lexRawIdentifier(Next, End); 557 Id.append(Next, Last); 558 Next = getIdentifierContinuation(Last, End); 559 } 560 return IdInfo{ 561 Last, 562 SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()}; 563} 564 565void Minimizer::printAdjacentMacroArgs(const char *&First, 566 const char *const End) { 567 // Skip over parts of the body. 568 const char *Last = First; 569 do 570 ++Last; 571 while (Last != End && 572 (isIdentifierBody(*Last) || *Last == '.' || *Last == ',')); 573 append(First, Last); 574 First = Last; 575} 576 577bool Minimizer::printMacroArgs(const char *&First, const char *const End) { 578 assert(*First == '('); 579 put(*First++); 580 for (;;) { 581 skipWhitespace(First, End); 582 if (First == End) 583 return true; 584 585 if (*First == ')') { 586 put(*First++); 587 return false; 588 } 589 590 // This is intentionally fairly liberal. 591 if (!(isIdentifierBody(*First) || *First == '.' || *First == ',')) 592 return true; 593 594 printAdjacentMacroArgs(First, End); 595 } 596} 597 598/// Looks for an identifier starting from Last. 599/// 600/// Updates "First" to just past the next identifier, if any. Returns true iff 601/// the identifier matches "Id". 602bool Minimizer::isNextIdentifier(StringRef Id, const char *&First, 603 const char *const End) { 604 skipWhitespace(First, End); 605 if (First == End || !isIdentifierHead(*First)) 606 return false; 607 608 IdInfo FoundId = lexIdentifier(First, End); 609 First = FoundId.Last; 610 return FoundId.Name == Id; 611} 612 613bool Minimizer::lexAt(const char *&First, const char *const End) { 614 // Handle "@import". 615 const char *ImportLoc = First++; 616 if (!isNextIdentifier("import", First, End)) { 617 skipLine(First, End); 618 return false; 619 } 620 makeToken(decl_at_import); 621 append("@import "); 622 if (printAtImportBody(First, End)) 623 return reportError( 624 ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import); 625 skipWhitespace(First, End); 626 if (First == End) 627 return false; 628 if (!isVerticalWhitespace(*First)) 629 return reportError( 630 ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import); 631 skipNewline(First, End); 632 return false; 633} 634 635bool Minimizer::lexModule(const char *&First, const char *const End) { 636 IdInfo Id = lexIdentifier(First, End); 637 First = Id.Last; 638 bool Export = false; 639 if (Id.Name == "export") { 640 Export = true; 641 skipWhitespace(First, End); 642 if (!isIdentifierBody(*First)) { 643 skipLine(First, End); 644 return false; 645 } 646 Id = lexIdentifier(First, End); 647 First = Id.Last; 648 } 649 650 if (Id.Name != "module" && Id.Name != "import") { 651 skipLine(First, End); 652 return false; 653 } 654 655 skipWhitespace(First, End); 656 657 // Ignore this as a module directive if the next character can't be part of 658 // an import. 659 660 switch (*First) { 661 case ':': 662 case '<': 663 case '"': 664 break; 665 default: 666 if (!isIdentifierBody(*First)) { 667 skipLine(First, End); 668 return false; 669 } 670 } 671 672 if (Export) { 673 makeToken(cxx_export_decl); 674 append("export "); 675 } 676 677 if (Id.Name == "module") 678 makeToken(cxx_module_decl); 679 else 680 makeToken(cxx_import_decl); 681 append(Id.Name); 682 append(" "); 683 printToNewline(First, End); 684 append("\n"); 685 return false; 686} 687 688bool Minimizer::lexDefine(const char *&First, const char *const End) { 689 makeToken(pp_define); 690 append("#define "); 691 skipWhitespace(First, End); 692 693 if (!isIdentifierHead(*First)) 694 return reportError(First, diag::err_pp_macro_not_identifier); 695 696 IdInfo Id = lexIdentifier(First, End); 697 const char *Last = Id.Last; 698 append(Id.Name); 699 if (Last == End) 700 return false; 701 if (*Last == '(') { 702 size_t Size = Out.size(); 703 if (printMacroArgs(Last, End)) { 704 // Be robust to bad macro arguments, since they can show up in disabled 705 // code. 706 Out.resize(Size); 707 append("(/* invalid */\n"); 708 skipLine(Last, End); 709 return false; 710 } 711 } 712 skipWhitespace(Last, End); 713 if (Last == End) 714 return false; 715 if (!isVerticalWhitespace(*Last)) 716 put(' '); 717 printDirectiveBody(Last, End); 718 First = Last; 719 return false; 720} 721 722bool Minimizer::lexPragma(const char *&First, const char *const End) { 723 // #pragma. 724 skipWhitespace(First, End); 725 if (First == End || !isIdentifierHead(*First)) 726 return false; 727 728 IdInfo FoundId = lexIdentifier(First, End); 729 First = FoundId.Last; 730 if (FoundId.Name == "once") { 731 // #pragma once 732 skipLine(First, End); 733 makeToken(pp_pragma_once); 734 append("#pragma once\n"); 735 return false; 736 } 737 738 if (FoundId.Name != "clang") { 739 skipLine(First, End); 740 return false; 741 } 742 743 // #pragma clang. 744 if (!isNextIdentifier("module", First, End)) { 745 skipLine(First, End); 746 return false; 747 } 748 749 // #pragma clang module. 750 if (!isNextIdentifier("import", First, End)) { 751 skipLine(First, End); 752 return false; 753 } 754 755 // #pragma clang module import. 756 makeToken(pp_pragma_import); 757 append("#pragma clang module import "); 758 printDirectiveBody(First, End); 759 return false; 760} 761 762bool Minimizer::lexEndif(const char *&First, const char *const End) { 763 // Strip out "#else" if it's empty. 764 if (top() == pp_else) 765 popToken(); 766 767 // If "#ifdef" is empty, strip it and skip the "#endif". 768 // 769 // FIXME: Once/if Clang starts disallowing __has_include in macro expansions, 770 // we can skip empty `#if` and `#elif` blocks as well after scanning for a 771 // literal __has_include in the condition. Even without that rule we could 772 // drop the tokens if we scan for identifiers in the condition and find none. 773 if (top() == pp_ifdef || top() == pp_ifndef) { 774 popToken(); 775 skipLine(First, End); 776 return false; 777 } 778 779 return lexDefault(pp_endif, "endif", First, End); 780} 781 782bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive, 783 const char *&First, const char *const End) { 784 makeToken(Kind); 785 put('#').append(Directive).put(' '); 786 printDirectiveBody(First, End); 787 return false; 788} 789 790static bool isStartOfRelevantLine(char First) { 791 switch (First) { 792 case '#': 793 case '@': 794 case 'i': 795 case 'e': 796 case 'm': 797 return true; 798 } 799 return false; 800} 801 802bool Minimizer::lexPPLine(const char *&First, const char *const End) { 803 assert(First != End); 804 805 skipWhitespace(First, End); 806 assert(First <= End); 807 if (First == End) 808 return false; 809 810 if (!isStartOfRelevantLine(*First)) { 811 skipLine(First, End); 812 assert(First <= End); 813 return false; 814 } 815 816 // Handle "@import". 817 if (*First == '@') 818 return lexAt(First, End); 819 820 if (*First == 'i' || *First == 'e' || *First == 'm') 821 return lexModule(First, End); 822 823 // Handle preprocessing directives. 824 ++First; // Skip over '#'. 825 skipWhitespace(First, End); 826 827 if (First == End) 828 return reportError(First, diag::err_pp_expected_eol); 829 830 if (!isIdentifierHead(*First)) { 831 skipLine(First, End); 832 return false; 833 } 834 835 // Figure out the token. 836 IdInfo Id = lexIdentifier(First, End); 837 First = Id.Last; 838 auto Kind = llvm::StringSwitch<TokenKind>(Id.Name) 839 .Case("include", pp_include) 840 .Case("__include_macros", pp___include_macros) 841 .Case("define", pp_define) 842 .Case("undef", pp_undef) 843 .Case("import", pp_import) 844 .Case("include_next", pp_include_next) 845 .Case("if", pp_if) 846 .Case("ifdef", pp_ifdef) 847 .Case("ifndef", pp_ifndef) 848 .Case("elif", pp_elif) 849 .Case("else", pp_else) 850 .Case("endif", pp_endif) 851 .Case("pragma", pp_pragma_import) 852 .Default(pp_none); 853 if (Kind == pp_none) { 854 skipDirective(Id.Name, First, End); 855 return false; 856 } 857 858 if (Kind == pp_endif) 859 return lexEndif(First, End); 860 861 if (Kind == pp_define) 862 return lexDefine(First, End); 863 864 if (Kind == pp_pragma_import) 865 return lexPragma(First, End); 866 867 // Everything else. 868 return lexDefault(Kind, Id.Name, First, End); 869} 870 871static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { 872 if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' && 873 First[2] == '\xbf') 874 First += 3; 875} 876 877bool Minimizer::minimizeImpl(const char *First, const char *const End) { 878 skipUTF8ByteOrderMark(First, End); 879 while (First != End) 880 if (lexPPLine(First, End)) 881 return true; 882 return false; 883} 884 885bool Minimizer::minimize() { 886 bool Error = minimizeImpl(Input.begin(), Input.end()); 887 888 if (!Error) { 889 // Add a trailing newline and an EOF on success. 890 if (!Out.empty() && Out.back() != '\n') 891 Out.push_back('\n'); 892 makeToken(pp_eof); 893 } 894 895 // Null-terminate the output. This way the memory buffer that's passed to 896 // Clang will not have to worry about the terminating '\0'. 897 Out.push_back(0); 898 Out.pop_back(); 899 return Error; 900} 901 902bool clang::minimize_source_to_dependency_directives::computeSkippedRanges( 903 ArrayRef<Token> Input, llvm::SmallVectorImpl<SkippedRange> &Range) { 904 struct Directive { 905 enum DirectiveKind { 906 If, // if/ifdef/ifndef 907 Else // elif,else 908 }; 909 int Offset; 910 DirectiveKind Kind; 911 }; 912 llvm::SmallVector<Directive, 32> Offsets; 913 for (const Token &T : Input) { 914 switch (T.K) { 915 case pp_if: 916 case pp_ifdef: 917 case pp_ifndef: 918 Offsets.push_back({T.Offset, Directive::If}); 919 break; 920 921 case pp_elif: 922 case pp_else: { 923 if (Offsets.empty()) 924 return true; 925 int PreviousOffset = Offsets.back().Offset; 926 Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); 927 Offsets.push_back({T.Offset, Directive::Else}); 928 break; 929 } 930 931 case pp_endif: { 932 if (Offsets.empty()) 933 return true; 934 int PreviousOffset = Offsets.back().Offset; 935 Range.push_back({PreviousOffset, T.Offset - PreviousOffset}); 936 do { 937 Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind; 938 if (Kind == Directive::If) 939 break; 940 } while (!Offsets.empty()); 941 break; 942 } 943 default: 944 break; 945 } 946 } 947 return false; 948} 949 950bool clang::minimizeSourceToDependencyDirectives( 951 StringRef Input, SmallVectorImpl<char> &Output, 952 SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags, 953 SourceLocation InputSourceLoc) { 954 Output.clear(); 955 Tokens.clear(); 956 return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize(); 957} 958