1336815Sdim//===--- HeaderIncludes.cpp - Insert/Delete #includes --*- C++ -*----------===//
2336815Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6336815Sdim//
7336815Sdim//===----------------------------------------------------------------------===//
8336815Sdim
9336815Sdim#include "clang/Tooling/Inclusions/HeaderIncludes.h"
10336815Sdim#include "clang/Basic/SourceManager.h"
11336815Sdim#include "clang/Lex/Lexer.h"
12353358Sdim#include "llvm/ADT/Optional.h"
13344779Sdim#include "llvm/Support/FormatVariadic.h"
14336815Sdim
15336815Sdimnamespace clang {
16336815Sdimnamespace tooling {
17336815Sdimnamespace {
18336815Sdim
19336815SdimLangOptions createLangOpts() {
20336815Sdim  LangOptions LangOpts;
21336815Sdim  LangOpts.CPlusPlus = 1;
22336815Sdim  LangOpts.CPlusPlus11 = 1;
23336815Sdim  LangOpts.CPlusPlus14 = 1;
24336815Sdim  LangOpts.LineComment = 1;
25336815Sdim  LangOpts.CXXOperatorNames = 1;
26336815Sdim  LangOpts.Bool = 1;
27344779Sdim  LangOpts.ObjC = 1;
28336815Sdim  LangOpts.MicrosoftExt = 1;    // To get kw___try, kw___finally.
29336815Sdim  LangOpts.DeclSpecKeyword = 1; // To get __declspec.
30336815Sdim  LangOpts.WChar = 1;           // To get wchar_t
31336815Sdim  return LangOpts;
32336815Sdim}
33336815Sdim
34336815Sdim// Returns the offset after skipping a sequence of tokens, matched by \p
35336815Sdim// GetOffsetAfterSequence, from the start of the code.
36336815Sdim// \p GetOffsetAfterSequence should be a function that matches a sequence of
37336815Sdim// tokens and returns an offset after the sequence.
38336815Sdimunsigned getOffsetAfterTokenSequence(
39336815Sdim    StringRef FileName, StringRef Code, const IncludeStyle &Style,
40336815Sdim    llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)>
41336815Sdim        GetOffsetAfterSequence) {
42336815Sdim  SourceManagerForFile VirtualSM(FileName, Code);
43336815Sdim  SourceManager &SM = VirtualSM.get();
44336815Sdim  Lexer Lex(SM.getMainFileID(), SM.getBuffer(SM.getMainFileID()), SM,
45336815Sdim            createLangOpts());
46336815Sdim  Token Tok;
47336815Sdim  // Get the first token.
48336815Sdim  Lex.LexFromRawLexer(Tok);
49336815Sdim  return GetOffsetAfterSequence(SM, Lex, Tok);
50336815Sdim}
51336815Sdim
52336815Sdim// Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
53336815Sdim// \p Tok will be the token after this directive; otherwise, it can be any token
54353358Sdim// after the given \p Tok (including \p Tok). If \p RawIDName is provided, the
55353358Sdim// (second) raw_identifier name is checked.
56353358Sdimbool checkAndConsumeDirectiveWithName(
57353358Sdim    Lexer &Lex, StringRef Name, Token &Tok,
58353358Sdim    llvm::Optional<StringRef> RawIDName = llvm::None) {
59336815Sdim  bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
60336815Sdim                 Tok.is(tok::raw_identifier) &&
61336815Sdim                 Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) &&
62353358Sdim                 Tok.is(tok::raw_identifier) &&
63353358Sdim                 (!RawIDName || Tok.getRawIdentifier() == *RawIDName);
64336815Sdim  if (Matched)
65336815Sdim    Lex.LexFromRawLexer(Tok);
66336815Sdim  return Matched;
67336815Sdim}
68336815Sdim
69336815Sdimvoid skipComments(Lexer &Lex, Token &Tok) {
70336815Sdim  while (Tok.is(tok::comment))
71336815Sdim    if (Lex.LexFromRawLexer(Tok))
72336815Sdim      return;
73336815Sdim}
74336815Sdim
75336815Sdim// Returns the offset after header guard directives and any comments
76353358Sdim// before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no
77353358Sdim// header guard is present in the code, this will return the offset after
78353358Sdim// skipping all comments from the start of the code.
79336815Sdimunsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
80336815Sdim                                               StringRef Code,
81336815Sdim                                               const IncludeStyle &Style) {
82353358Sdim  // \p Consume returns location after header guard or 0 if no header guard is
83353358Sdim  // found.
84353358Sdim  auto ConsumeHeaderGuardAndComment =
85353358Sdim      [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex,
86353358Sdim                                 Token Tok)>
87353358Sdim              Consume) {
88353358Sdim        return getOffsetAfterTokenSequence(
89353358Sdim            FileName, Code, Style,
90353358Sdim            [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) {
91353358Sdim              skipComments(Lex, Tok);
92353358Sdim              unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
93353358Sdim              return std::max(InitialOffset, Consume(SM, Lex, Tok));
94353358Sdim            });
95353358Sdim      };
96353358Sdim  return std::max(
97353358Sdim      // #ifndef/#define
98353358Sdim      ConsumeHeaderGuardAndComment(
99353358Sdim          [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
100353358Sdim            if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
101353358Sdim              skipComments(Lex, Tok);
102353358Sdim              if (checkAndConsumeDirectiveWithName(Lex, "define", Tok))
103353358Sdim                return SM.getFileOffset(Tok.getLocation());
104353358Sdim            }
105353358Sdim            return 0;
106353358Sdim          }),
107353358Sdim      // #pragma once
108353358Sdim      ConsumeHeaderGuardAndComment(
109353358Sdim          [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
110353358Sdim            if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok,
111353358Sdim                                                 StringRef("once")))
112353358Sdim              return SM.getFileOffset(Tok.getLocation());
113353358Sdim            return 0;
114353358Sdim          }));
115336815Sdim}
116336815Sdim
117336815Sdim// Check if a sequence of tokens is like
118336815Sdim//    "#include ("header.h" | <header.h>)".
119336815Sdim// If it is, \p Tok will be the token after this directive; otherwise, it can be
120336815Sdim// any token after the given \p Tok (including \p Tok).
121336815Sdimbool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) {
122336815Sdim  auto Matched = [&]() {
123336815Sdim    Lex.LexFromRawLexer(Tok);
124336815Sdim    return true;
125336815Sdim  };
126336815Sdim  if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
127336815Sdim      Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") {
128336815Sdim    if (Lex.LexFromRawLexer(Tok))
129336815Sdim      return false;
130336815Sdim    if (Tok.is(tok::string_literal))
131336815Sdim      return Matched();
132336815Sdim    if (Tok.is(tok::less)) {
133336815Sdim      while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) {
134336815Sdim      }
135336815Sdim      if (Tok.is(tok::greater))
136336815Sdim        return Matched();
137336815Sdim    }
138336815Sdim  }
139336815Sdim  return false;
140336815Sdim}
141336815Sdim
142336815Sdim// Returns the offset of the last #include directive after which a new
143336815Sdim// #include can be inserted. This ignores #include's after the #include block(s)
144336815Sdim// in the beginning of a file to avoid inserting headers into code sections
145336815Sdim// where new #include's should not be added by default.
146336815Sdim// These code sections include:
147336815Sdim//      - raw string literals (containing #include).
148336815Sdim//      - #if blocks.
149336815Sdim//      - Special #include's among declarations (e.g. functions).
150336815Sdim//
151336815Sdim// If no #include after which a new #include can be inserted, this returns the
152336815Sdim// offset after skipping all comments from the start of the code.
153336815Sdim// Inserting after an #include is not allowed if it comes after code that is not
154336815Sdim// #include (e.g. pre-processing directive that is not #include, declarations).
155336815Sdimunsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
156336815Sdim                                     const IncludeStyle &Style) {
157336815Sdim  return getOffsetAfterTokenSequence(
158336815Sdim      FileName, Code, Style,
159336815Sdim      [](const SourceManager &SM, Lexer &Lex, Token Tok) {
160336815Sdim        skipComments(Lex, Tok);
161336815Sdim        unsigned MaxOffset = SM.getFileOffset(Tok.getLocation());
162336815Sdim        while (checkAndConsumeInclusiveDirective(Lex, Tok))
163336815Sdim          MaxOffset = SM.getFileOffset(Tok.getLocation());
164336815Sdim        return MaxOffset;
165336815Sdim      });
166336815Sdim}
167336815Sdim
168336815Sdiminline StringRef trimInclude(StringRef IncludeName) {
169336815Sdim  return IncludeName.trim("\"<>");
170336815Sdim}
171336815Sdim
172336815Sdimconst char IncludeRegexPattern[] =
173336815Sdim    R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
174336815Sdim
175336815Sdim} // anonymous namespace
176336815Sdim
177336815SdimIncludeCategoryManager::IncludeCategoryManager(const IncludeStyle &Style,
178336815Sdim                                               StringRef FileName)
179336815Sdim    : Style(Style), FileName(FileName) {
180336815Sdim  FileStem = llvm::sys::path::stem(FileName);
181336815Sdim  for (const auto &Category : Style.IncludeCategories)
182336815Sdim    CategoryRegexs.emplace_back(Category.Regex, llvm::Regex::IgnoreCase);
183336815Sdim  IsMainFile = FileName.endswith(".c") || FileName.endswith(".cc") ||
184336815Sdim               FileName.endswith(".cpp") || FileName.endswith(".c++") ||
185336815Sdim               FileName.endswith(".cxx") || FileName.endswith(".m") ||
186336815Sdim               FileName.endswith(".mm");
187360784Sdim  if (!Style.IncludeIsMainSourceRegex.empty()) {
188360784Sdim    llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex);
189360784Sdim    IsMainFile |= MainFileRegex.match(FileName);
190360784Sdim  }
191336815Sdim}
192336815Sdim
193336815Sdimint IncludeCategoryManager::getIncludePriority(StringRef IncludeName,
194336815Sdim                                               bool CheckMainHeader) const {
195336815Sdim  int Ret = INT_MAX;
196336815Sdim  for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
197336815Sdim    if (CategoryRegexs[i].match(IncludeName)) {
198336815Sdim      Ret = Style.IncludeCategories[i].Priority;
199336815Sdim      break;
200336815Sdim    }
201336815Sdim  if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
202336815Sdim    Ret = 0;
203336815Sdim  return Ret;
204336815Sdim}
205336815Sdim
206360784Sdimint IncludeCategoryManager::getSortIncludePriority(StringRef IncludeName,
207360784Sdim                                                   bool CheckMainHeader) const {
208360784Sdim  int Ret = INT_MAX;
209360784Sdim  for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
210360784Sdim    if (CategoryRegexs[i].match(IncludeName)) {
211360784Sdim      Ret = Style.IncludeCategories[i].SortPriority;
212360784Sdim      if (Ret == 0)
213360784Sdim        Ret = Style.IncludeCategories[i].Priority;
214360784Sdim      break;
215360784Sdim    }
216360784Sdim  if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
217360784Sdim    Ret = 0;
218360784Sdim  return Ret;
219360784Sdim}
220336815Sdimbool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const {
221336815Sdim  if (!IncludeName.startswith("\""))
222336815Sdim    return false;
223336815Sdim  StringRef HeaderStem =
224336815Sdim      llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1));
225336815Sdim  if (FileStem.startswith(HeaderStem) ||
226336815Sdim      FileStem.startswith_lower(HeaderStem)) {
227344779Sdim    llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex,
228336815Sdim                                 llvm::Regex::IgnoreCase);
229336815Sdim    if (MainIncludeRegex.match(FileStem))
230336815Sdim      return true;
231336815Sdim  }
232336815Sdim  return false;
233336815Sdim}
234336815Sdim
235336815SdimHeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
236336815Sdim                               const IncludeStyle &Style)
237336815Sdim    : FileName(FileName), Code(Code), FirstIncludeOffset(-1),
238336815Sdim      MinInsertOffset(
239336815Sdim          getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)),
240336815Sdim      MaxInsertOffset(MinInsertOffset +
241336815Sdim                      getMaxHeaderInsertionOffset(
242336815Sdim                          FileName, Code.drop_front(MinInsertOffset), Style)),
243336815Sdim      Categories(Style, FileName),
244336815Sdim      IncludeRegex(llvm::Regex(IncludeRegexPattern)) {
245336815Sdim  // Add 0 for main header and INT_MAX for headers that are not in any
246336815Sdim  // category.
247336815Sdim  Priorities = {0, INT_MAX};
248336815Sdim  for (const auto &Category : Style.IncludeCategories)
249336815Sdim    Priorities.insert(Category.Priority);
250336815Sdim  SmallVector<StringRef, 32> Lines;
251336815Sdim  Code.drop_front(MinInsertOffset).split(Lines, "\n");
252336815Sdim
253336815Sdim  unsigned Offset = MinInsertOffset;
254336815Sdim  unsigned NextLineOffset;
255336815Sdim  SmallVector<StringRef, 4> Matches;
256336815Sdim  for (auto Line : Lines) {
257336815Sdim    NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
258336815Sdim    if (IncludeRegex.match(Line, &Matches)) {
259336815Sdim      // If this is the last line without trailing newline, we need to make
260336815Sdim      // sure we don't delete across the file boundary.
261336815Sdim      addExistingInclude(
262336815Sdim          Include(Matches[2],
263336815Sdim                  tooling::Range(
264336815Sdim                      Offset, std::min(Line.size() + 1, Code.size() - Offset))),
265336815Sdim          NextLineOffset);
266336815Sdim    }
267336815Sdim    Offset = NextLineOffset;
268336815Sdim  }
269336815Sdim
270336815Sdim  // Populate CategoryEndOfssets:
271336815Sdim  // - Ensure that CategoryEndOffset[Highest] is always populated.
272336815Sdim  // - If CategoryEndOffset[Priority] isn't set, use the next higher value
273336815Sdim  // that is set, up to CategoryEndOffset[Highest].
274336815Sdim  auto Highest = Priorities.begin();
275336815Sdim  if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) {
276336815Sdim    if (FirstIncludeOffset >= 0)
277336815Sdim      CategoryEndOffsets[*Highest] = FirstIncludeOffset;
278336815Sdim    else
279336815Sdim      CategoryEndOffsets[*Highest] = MinInsertOffset;
280336815Sdim  }
281336815Sdim  // By this point, CategoryEndOffset[Highest] is always set appropriately:
282336815Sdim  //  - to an appropriate location before/after existing #includes, or
283336815Sdim  //  - to right after the header guard, or
284336815Sdim  //  - to the beginning of the file.
285336815Sdim  for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
286336815Sdim    if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
287336815Sdim      CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)];
288336815Sdim}
289336815Sdim
290336815Sdim// \p Offset: the start of the line following this include directive.
291336815Sdimvoid HeaderIncludes::addExistingInclude(Include IncludeToAdd,
292336815Sdim                                        unsigned NextLineOffset) {
293336815Sdim  auto Iter =
294336815Sdim      ExistingIncludes.try_emplace(trimInclude(IncludeToAdd.Name)).first;
295336815Sdim  Iter->second.push_back(std::move(IncludeToAdd));
296336815Sdim  auto &CurInclude = Iter->second.back();
297336815Sdim  // The header name with quotes or angle brackets.
298336815Sdim  // Only record the offset of current #include if we can insert after it.
299336815Sdim  if (CurInclude.R.getOffset() <= MaxInsertOffset) {
300336815Sdim    int Priority = Categories.getIncludePriority(
301336815Sdim        CurInclude.Name, /*CheckMainHeader=*/FirstIncludeOffset < 0);
302336815Sdim    CategoryEndOffsets[Priority] = NextLineOffset;
303336815Sdim    IncludesByPriority[Priority].push_back(&CurInclude);
304336815Sdim    if (FirstIncludeOffset < 0)
305336815Sdim      FirstIncludeOffset = CurInclude.R.getOffset();
306336815Sdim  }
307336815Sdim}
308336815Sdim
309336815Sdimllvm::Optional<tooling::Replacement>
310336815SdimHeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled) const {
311336815Sdim  assert(IncludeName == trimInclude(IncludeName));
312336815Sdim  // If a <header> ("header") already exists in code, "header" (<header>) with
313336815Sdim  // different quotation will still be inserted.
314336815Sdim  // FIXME: figure out if this is the best behavior.
315336815Sdim  auto It = ExistingIncludes.find(IncludeName);
316336815Sdim  if (It != ExistingIncludes.end())
317336815Sdim    for (const auto &Inc : It->second)
318336815Sdim      if ((IsAngled && StringRef(Inc.Name).startswith("<")) ||
319336815Sdim          (!IsAngled && StringRef(Inc.Name).startswith("\"")))
320336815Sdim        return llvm::None;
321344779Sdim  std::string Quoted =
322344779Sdim      llvm::formatv(IsAngled ? "<{0}>" : "\"{0}\"", IncludeName);
323336815Sdim  StringRef QuotedName = Quoted;
324336815Sdim  int Priority = Categories.getIncludePriority(
325336815Sdim      QuotedName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
326336815Sdim  auto CatOffset = CategoryEndOffsets.find(Priority);
327336815Sdim  assert(CatOffset != CategoryEndOffsets.end());
328336815Sdim  unsigned InsertOffset = CatOffset->second; // Fall back offset
329336815Sdim  auto Iter = IncludesByPriority.find(Priority);
330336815Sdim  if (Iter != IncludesByPriority.end()) {
331336815Sdim    for (const auto *Inc : Iter->second) {
332336815Sdim      if (QuotedName < Inc->Name) {
333336815Sdim        InsertOffset = Inc->R.getOffset();
334336815Sdim        break;
335336815Sdim      }
336336815Sdim    }
337336815Sdim  }
338336815Sdim  assert(InsertOffset <= Code.size());
339344779Sdim  std::string NewInclude = llvm::formatv("#include {0}\n", QuotedName);
340336815Sdim  // When inserting headers at end of the code, also append '\n' to the code
341336815Sdim  // if it does not end with '\n'.
342336815Sdim  // FIXME: when inserting multiple #includes at the end of code, only one
343336815Sdim  // newline should be added.
344336815Sdim  if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n'))
345336815Sdim    NewInclude = "\n" + NewInclude;
346336815Sdim  return tooling::Replacement(FileName, InsertOffset, 0, NewInclude);
347336815Sdim}
348336815Sdim
349336815Sdimtooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName,
350336815Sdim                                             bool IsAngled) const {
351336815Sdim  assert(IncludeName == trimInclude(IncludeName));
352336815Sdim  tooling::Replacements Result;
353336815Sdim  auto Iter = ExistingIncludes.find(IncludeName);
354336815Sdim  if (Iter == ExistingIncludes.end())
355336815Sdim    return Result;
356336815Sdim  for (const auto &Inc : Iter->second) {
357336815Sdim    if ((IsAngled && StringRef(Inc.Name).startswith("\"")) ||
358336815Sdim        (!IsAngled && StringRef(Inc.Name).startswith("<")))
359336815Sdim      continue;
360336815Sdim    llvm::Error Err = Result.add(tooling::Replacement(
361336815Sdim        FileName, Inc.R.getOffset(), Inc.R.getLength(), ""));
362336815Sdim    if (Err) {
363336815Sdim      auto ErrMsg = "Unexpected conflicts in #include deletions: " +
364336815Sdim                    llvm::toString(std::move(Err));
365336815Sdim      llvm_unreachable(ErrMsg.c_str());
366336815Sdim    }
367336815Sdim  }
368336815Sdim  return Result;
369336815Sdim}
370336815Sdim
371336815Sdim} // namespace tooling
372336815Sdim} // namespace clang
373