1336815Sdim//===--- HeaderIncludes.cpp - Insert/Delete #includes --*- C++ -*----------===// 2336815Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6336815Sdim// 7336815Sdim//===----------------------------------------------------------------------===// 8336815Sdim 9336815Sdim#include "clang/Tooling/Inclusions/HeaderIncludes.h" 10336815Sdim#include "clang/Basic/SourceManager.h" 11336815Sdim#include "clang/Lex/Lexer.h" 12353358Sdim#include "llvm/ADT/Optional.h" 13344779Sdim#include "llvm/Support/FormatVariadic.h" 14336815Sdim 15336815Sdimnamespace clang { 16336815Sdimnamespace tooling { 17336815Sdimnamespace { 18336815Sdim 19336815SdimLangOptions createLangOpts() { 20336815Sdim LangOptions LangOpts; 21336815Sdim LangOpts.CPlusPlus = 1; 22336815Sdim LangOpts.CPlusPlus11 = 1; 23336815Sdim LangOpts.CPlusPlus14 = 1; 24336815Sdim LangOpts.LineComment = 1; 25336815Sdim LangOpts.CXXOperatorNames = 1; 26336815Sdim LangOpts.Bool = 1; 27344779Sdim LangOpts.ObjC = 1; 28336815Sdim LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. 29336815Sdim LangOpts.DeclSpecKeyword = 1; // To get __declspec. 30336815Sdim LangOpts.WChar = 1; // To get wchar_t 31336815Sdim return LangOpts; 32336815Sdim} 33336815Sdim 34336815Sdim// Returns the offset after skipping a sequence of tokens, matched by \p 35336815Sdim// GetOffsetAfterSequence, from the start of the code. 36336815Sdim// \p GetOffsetAfterSequence should be a function that matches a sequence of 37336815Sdim// tokens and returns an offset after the sequence. 38336815Sdimunsigned getOffsetAfterTokenSequence( 39336815Sdim StringRef FileName, StringRef Code, const IncludeStyle &Style, 40336815Sdim llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)> 41336815Sdim GetOffsetAfterSequence) { 42336815Sdim SourceManagerForFile VirtualSM(FileName, Code); 43336815Sdim SourceManager &SM = VirtualSM.get(); 44336815Sdim Lexer Lex(SM.getMainFileID(), SM.getBuffer(SM.getMainFileID()), SM, 45336815Sdim createLangOpts()); 46336815Sdim Token Tok; 47336815Sdim // Get the first token. 48336815Sdim Lex.LexFromRawLexer(Tok); 49336815Sdim return GetOffsetAfterSequence(SM, Lex, Tok); 50336815Sdim} 51336815Sdim 52336815Sdim// Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is, 53336815Sdim// \p Tok will be the token after this directive; otherwise, it can be any token 54353358Sdim// after the given \p Tok (including \p Tok). If \p RawIDName is provided, the 55353358Sdim// (second) raw_identifier name is checked. 56353358Sdimbool checkAndConsumeDirectiveWithName( 57353358Sdim Lexer &Lex, StringRef Name, Token &Tok, 58353358Sdim llvm::Optional<StringRef> RawIDName = llvm::None) { 59336815Sdim bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && 60336815Sdim Tok.is(tok::raw_identifier) && 61336815Sdim Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) && 62353358Sdim Tok.is(tok::raw_identifier) && 63353358Sdim (!RawIDName || Tok.getRawIdentifier() == *RawIDName); 64336815Sdim if (Matched) 65336815Sdim Lex.LexFromRawLexer(Tok); 66336815Sdim return Matched; 67336815Sdim} 68336815Sdim 69336815Sdimvoid skipComments(Lexer &Lex, Token &Tok) { 70336815Sdim while (Tok.is(tok::comment)) 71336815Sdim if (Lex.LexFromRawLexer(Tok)) 72336815Sdim return; 73336815Sdim} 74336815Sdim 75336815Sdim// Returns the offset after header guard directives and any comments 76353358Sdim// before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no 77353358Sdim// header guard is present in the code, this will return the offset after 78353358Sdim// skipping all comments from the start of the code. 79336815Sdimunsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, 80336815Sdim StringRef Code, 81336815Sdim const IncludeStyle &Style) { 82353358Sdim // \p Consume returns location after header guard or 0 if no header guard is 83353358Sdim // found. 84353358Sdim auto ConsumeHeaderGuardAndComment = 85353358Sdim [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex, 86353358Sdim Token Tok)> 87353358Sdim Consume) { 88353358Sdim return getOffsetAfterTokenSequence( 89353358Sdim FileName, Code, Style, 90353358Sdim [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) { 91353358Sdim skipComments(Lex, Tok); 92353358Sdim unsigned InitialOffset = SM.getFileOffset(Tok.getLocation()); 93353358Sdim return std::max(InitialOffset, Consume(SM, Lex, Tok)); 94353358Sdim }); 95353358Sdim }; 96353358Sdim return std::max( 97353358Sdim // #ifndef/#define 98353358Sdim ConsumeHeaderGuardAndComment( 99353358Sdim [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { 100353358Sdim if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { 101353358Sdim skipComments(Lex, Tok); 102353358Sdim if (checkAndConsumeDirectiveWithName(Lex, "define", Tok)) 103353358Sdim return SM.getFileOffset(Tok.getLocation()); 104353358Sdim } 105353358Sdim return 0; 106353358Sdim }), 107353358Sdim // #pragma once 108353358Sdim ConsumeHeaderGuardAndComment( 109353358Sdim [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { 110353358Sdim if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok, 111353358Sdim StringRef("once"))) 112353358Sdim return SM.getFileOffset(Tok.getLocation()); 113353358Sdim return 0; 114353358Sdim })); 115336815Sdim} 116336815Sdim 117336815Sdim// Check if a sequence of tokens is like 118336815Sdim// "#include ("header.h" | <header.h>)". 119336815Sdim// If it is, \p Tok will be the token after this directive; otherwise, it can be 120336815Sdim// any token after the given \p Tok (including \p Tok). 121336815Sdimbool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) { 122336815Sdim auto Matched = [&]() { 123336815Sdim Lex.LexFromRawLexer(Tok); 124336815Sdim return true; 125336815Sdim }; 126336815Sdim if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && 127336815Sdim Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") { 128336815Sdim if (Lex.LexFromRawLexer(Tok)) 129336815Sdim return false; 130336815Sdim if (Tok.is(tok::string_literal)) 131336815Sdim return Matched(); 132336815Sdim if (Tok.is(tok::less)) { 133336815Sdim while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) { 134336815Sdim } 135336815Sdim if (Tok.is(tok::greater)) 136336815Sdim return Matched(); 137336815Sdim } 138336815Sdim } 139336815Sdim return false; 140336815Sdim} 141336815Sdim 142336815Sdim// Returns the offset of the last #include directive after which a new 143336815Sdim// #include can be inserted. This ignores #include's after the #include block(s) 144336815Sdim// in the beginning of a file to avoid inserting headers into code sections 145336815Sdim// where new #include's should not be added by default. 146336815Sdim// These code sections include: 147336815Sdim// - raw string literals (containing #include). 148336815Sdim// - #if blocks. 149336815Sdim// - Special #include's among declarations (e.g. functions). 150336815Sdim// 151336815Sdim// If no #include after which a new #include can be inserted, this returns the 152336815Sdim// offset after skipping all comments from the start of the code. 153336815Sdim// Inserting after an #include is not allowed if it comes after code that is not 154336815Sdim// #include (e.g. pre-processing directive that is not #include, declarations). 155336815Sdimunsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code, 156336815Sdim const IncludeStyle &Style) { 157336815Sdim return getOffsetAfterTokenSequence( 158336815Sdim FileName, Code, Style, 159336815Sdim [](const SourceManager &SM, Lexer &Lex, Token Tok) { 160336815Sdim skipComments(Lex, Tok); 161336815Sdim unsigned MaxOffset = SM.getFileOffset(Tok.getLocation()); 162336815Sdim while (checkAndConsumeInclusiveDirective(Lex, Tok)) 163336815Sdim MaxOffset = SM.getFileOffset(Tok.getLocation()); 164336815Sdim return MaxOffset; 165336815Sdim }); 166336815Sdim} 167336815Sdim 168336815Sdiminline StringRef trimInclude(StringRef IncludeName) { 169336815Sdim return IncludeName.trim("\"<>"); 170336815Sdim} 171336815Sdim 172336815Sdimconst char IncludeRegexPattern[] = 173336815Sdim R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))"; 174336815Sdim 175336815Sdim} // anonymous namespace 176336815Sdim 177336815SdimIncludeCategoryManager::IncludeCategoryManager(const IncludeStyle &Style, 178336815Sdim StringRef FileName) 179336815Sdim : Style(Style), FileName(FileName) { 180336815Sdim FileStem = llvm::sys::path::stem(FileName); 181336815Sdim for (const auto &Category : Style.IncludeCategories) 182336815Sdim CategoryRegexs.emplace_back(Category.Regex, llvm::Regex::IgnoreCase); 183336815Sdim IsMainFile = FileName.endswith(".c") || FileName.endswith(".cc") || 184336815Sdim FileName.endswith(".cpp") || FileName.endswith(".c++") || 185336815Sdim FileName.endswith(".cxx") || FileName.endswith(".m") || 186336815Sdim FileName.endswith(".mm"); 187360784Sdim if (!Style.IncludeIsMainSourceRegex.empty()) { 188360784Sdim llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex); 189360784Sdim IsMainFile |= MainFileRegex.match(FileName); 190360784Sdim } 191336815Sdim} 192336815Sdim 193336815Sdimint IncludeCategoryManager::getIncludePriority(StringRef IncludeName, 194336815Sdim bool CheckMainHeader) const { 195336815Sdim int Ret = INT_MAX; 196336815Sdim for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) 197336815Sdim if (CategoryRegexs[i].match(IncludeName)) { 198336815Sdim Ret = Style.IncludeCategories[i].Priority; 199336815Sdim break; 200336815Sdim } 201336815Sdim if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName)) 202336815Sdim Ret = 0; 203336815Sdim return Ret; 204336815Sdim} 205336815Sdim 206360784Sdimint IncludeCategoryManager::getSortIncludePriority(StringRef IncludeName, 207360784Sdim bool CheckMainHeader) const { 208360784Sdim int Ret = INT_MAX; 209360784Sdim for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) 210360784Sdim if (CategoryRegexs[i].match(IncludeName)) { 211360784Sdim Ret = Style.IncludeCategories[i].SortPriority; 212360784Sdim if (Ret == 0) 213360784Sdim Ret = Style.IncludeCategories[i].Priority; 214360784Sdim break; 215360784Sdim } 216360784Sdim if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName)) 217360784Sdim Ret = 0; 218360784Sdim return Ret; 219360784Sdim} 220336815Sdimbool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const { 221336815Sdim if (!IncludeName.startswith("\"")) 222336815Sdim return false; 223336815Sdim StringRef HeaderStem = 224336815Sdim llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1)); 225336815Sdim if (FileStem.startswith(HeaderStem) || 226336815Sdim FileStem.startswith_lower(HeaderStem)) { 227344779Sdim llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex, 228336815Sdim llvm::Regex::IgnoreCase); 229336815Sdim if (MainIncludeRegex.match(FileStem)) 230336815Sdim return true; 231336815Sdim } 232336815Sdim return false; 233336815Sdim} 234336815Sdim 235336815SdimHeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code, 236336815Sdim const IncludeStyle &Style) 237336815Sdim : FileName(FileName), Code(Code), FirstIncludeOffset(-1), 238336815Sdim MinInsertOffset( 239336815Sdim getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)), 240336815Sdim MaxInsertOffset(MinInsertOffset + 241336815Sdim getMaxHeaderInsertionOffset( 242336815Sdim FileName, Code.drop_front(MinInsertOffset), Style)), 243336815Sdim Categories(Style, FileName), 244336815Sdim IncludeRegex(llvm::Regex(IncludeRegexPattern)) { 245336815Sdim // Add 0 for main header and INT_MAX for headers that are not in any 246336815Sdim // category. 247336815Sdim Priorities = {0, INT_MAX}; 248336815Sdim for (const auto &Category : Style.IncludeCategories) 249336815Sdim Priorities.insert(Category.Priority); 250336815Sdim SmallVector<StringRef, 32> Lines; 251336815Sdim Code.drop_front(MinInsertOffset).split(Lines, "\n"); 252336815Sdim 253336815Sdim unsigned Offset = MinInsertOffset; 254336815Sdim unsigned NextLineOffset; 255336815Sdim SmallVector<StringRef, 4> Matches; 256336815Sdim for (auto Line : Lines) { 257336815Sdim NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1); 258336815Sdim if (IncludeRegex.match(Line, &Matches)) { 259336815Sdim // If this is the last line without trailing newline, we need to make 260336815Sdim // sure we don't delete across the file boundary. 261336815Sdim addExistingInclude( 262336815Sdim Include(Matches[2], 263336815Sdim tooling::Range( 264336815Sdim Offset, std::min(Line.size() + 1, Code.size() - Offset))), 265336815Sdim NextLineOffset); 266336815Sdim } 267336815Sdim Offset = NextLineOffset; 268336815Sdim } 269336815Sdim 270336815Sdim // Populate CategoryEndOfssets: 271336815Sdim // - Ensure that CategoryEndOffset[Highest] is always populated. 272336815Sdim // - If CategoryEndOffset[Priority] isn't set, use the next higher value 273336815Sdim // that is set, up to CategoryEndOffset[Highest]. 274336815Sdim auto Highest = Priorities.begin(); 275336815Sdim if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) { 276336815Sdim if (FirstIncludeOffset >= 0) 277336815Sdim CategoryEndOffsets[*Highest] = FirstIncludeOffset; 278336815Sdim else 279336815Sdim CategoryEndOffsets[*Highest] = MinInsertOffset; 280336815Sdim } 281336815Sdim // By this point, CategoryEndOffset[Highest] is always set appropriately: 282336815Sdim // - to an appropriate location before/after existing #includes, or 283336815Sdim // - to right after the header guard, or 284336815Sdim // - to the beginning of the file. 285336815Sdim for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I) 286336815Sdim if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end()) 287336815Sdim CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)]; 288336815Sdim} 289336815Sdim 290336815Sdim// \p Offset: the start of the line following this include directive. 291336815Sdimvoid HeaderIncludes::addExistingInclude(Include IncludeToAdd, 292336815Sdim unsigned NextLineOffset) { 293336815Sdim auto Iter = 294336815Sdim ExistingIncludes.try_emplace(trimInclude(IncludeToAdd.Name)).first; 295336815Sdim Iter->second.push_back(std::move(IncludeToAdd)); 296336815Sdim auto &CurInclude = Iter->second.back(); 297336815Sdim // The header name with quotes or angle brackets. 298336815Sdim // Only record the offset of current #include if we can insert after it. 299336815Sdim if (CurInclude.R.getOffset() <= MaxInsertOffset) { 300336815Sdim int Priority = Categories.getIncludePriority( 301336815Sdim CurInclude.Name, /*CheckMainHeader=*/FirstIncludeOffset < 0); 302336815Sdim CategoryEndOffsets[Priority] = NextLineOffset; 303336815Sdim IncludesByPriority[Priority].push_back(&CurInclude); 304336815Sdim if (FirstIncludeOffset < 0) 305336815Sdim FirstIncludeOffset = CurInclude.R.getOffset(); 306336815Sdim } 307336815Sdim} 308336815Sdim 309336815Sdimllvm::Optional<tooling::Replacement> 310336815SdimHeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled) const { 311336815Sdim assert(IncludeName == trimInclude(IncludeName)); 312336815Sdim // If a <header> ("header") already exists in code, "header" (<header>) with 313336815Sdim // different quotation will still be inserted. 314336815Sdim // FIXME: figure out if this is the best behavior. 315336815Sdim auto It = ExistingIncludes.find(IncludeName); 316336815Sdim if (It != ExistingIncludes.end()) 317336815Sdim for (const auto &Inc : It->second) 318336815Sdim if ((IsAngled && StringRef(Inc.Name).startswith("<")) || 319336815Sdim (!IsAngled && StringRef(Inc.Name).startswith("\""))) 320336815Sdim return llvm::None; 321344779Sdim std::string Quoted = 322344779Sdim llvm::formatv(IsAngled ? "<{0}>" : "\"{0}\"", IncludeName); 323336815Sdim StringRef QuotedName = Quoted; 324336815Sdim int Priority = Categories.getIncludePriority( 325336815Sdim QuotedName, /*CheckMainHeader=*/FirstIncludeOffset < 0); 326336815Sdim auto CatOffset = CategoryEndOffsets.find(Priority); 327336815Sdim assert(CatOffset != CategoryEndOffsets.end()); 328336815Sdim unsigned InsertOffset = CatOffset->second; // Fall back offset 329336815Sdim auto Iter = IncludesByPriority.find(Priority); 330336815Sdim if (Iter != IncludesByPriority.end()) { 331336815Sdim for (const auto *Inc : Iter->second) { 332336815Sdim if (QuotedName < Inc->Name) { 333336815Sdim InsertOffset = Inc->R.getOffset(); 334336815Sdim break; 335336815Sdim } 336336815Sdim } 337336815Sdim } 338336815Sdim assert(InsertOffset <= Code.size()); 339344779Sdim std::string NewInclude = llvm::formatv("#include {0}\n", QuotedName); 340336815Sdim // When inserting headers at end of the code, also append '\n' to the code 341336815Sdim // if it does not end with '\n'. 342336815Sdim // FIXME: when inserting multiple #includes at the end of code, only one 343336815Sdim // newline should be added. 344336815Sdim if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n')) 345336815Sdim NewInclude = "\n" + NewInclude; 346336815Sdim return tooling::Replacement(FileName, InsertOffset, 0, NewInclude); 347336815Sdim} 348336815Sdim 349336815Sdimtooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName, 350336815Sdim bool IsAngled) const { 351336815Sdim assert(IncludeName == trimInclude(IncludeName)); 352336815Sdim tooling::Replacements Result; 353336815Sdim auto Iter = ExistingIncludes.find(IncludeName); 354336815Sdim if (Iter == ExistingIncludes.end()) 355336815Sdim return Result; 356336815Sdim for (const auto &Inc : Iter->second) { 357336815Sdim if ((IsAngled && StringRef(Inc.Name).startswith("\"")) || 358336815Sdim (!IsAngled && StringRef(Inc.Name).startswith("<"))) 359336815Sdim continue; 360336815Sdim llvm::Error Err = Result.add(tooling::Replacement( 361336815Sdim FileName, Inc.R.getOffset(), Inc.R.getLength(), "")); 362336815Sdim if (Err) { 363336815Sdim auto ErrMsg = "Unexpected conflicts in #include deletions: " + 364336815Sdim llvm::toString(std::move(Err)); 365336815Sdim llvm_unreachable(ErrMsg.c_str()); 366336815Sdim } 367336815Sdim } 368336815Sdim return Result; 369336815Sdim} 370336815Sdim 371336815Sdim} // namespace tooling 372336815Sdim} // namespace clang 373