1251609Sdim//===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
2251609Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6251609Sdim//
7251609Sdim//===----------------------------------------------------------------------===//
8251609Sdim///
9251609Sdim/// \file
10341825Sdim/// WhitespaceManager class manages whitespace around tokens and their
11251609Sdim/// replacements.
12251609Sdim///
13251609Sdim//===----------------------------------------------------------------------===//
14251609Sdim
15280031Sdim#ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
16280031Sdim#define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
17251609Sdim
18251609Sdim#include "TokenAnnotator.h"
19251609Sdim#include "clang/Basic/SourceManager.h"
20251609Sdim#include "clang/Format/Format.h"
21251609Sdim#include <string>
22251609Sdim
23251609Sdimnamespace clang {
24251609Sdimnamespace format {
25251609Sdim
26341825Sdim/// Manages the whitespaces around tokens and their replacements.
27251609Sdim///
28251609Sdim/// This includes special handling for certain constructs, e.g. the alignment of
29251609Sdim/// trailing line comments.
30261991Sdim///
31261991Sdim/// To guarantee correctness of alignment operations, the \c WhitespaceManager
32261991Sdim/// must be informed about every token in the source file; for each token, there
33261991Sdim/// must be exactly one call to either \c replaceWhitespace or
34261991Sdim/// \c addUntouchableToken.
35261991Sdim///
36261991Sdim/// There may be multiple calls to \c breakToken for a given token.
37251609Sdimclass WhitespaceManager {
38251609Sdimpublic:
39309124Sdim  WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
40261991Sdim                    bool UseCRLF)
41261991Sdim      : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
42251609Sdim
43353358Sdim  bool useCRLF() const { return UseCRLF; }
44353358Sdim
45341825Sdim  /// Replaces the whitespace in front of \p Tok. Only call once for
46251609Sdim  /// each \c AnnotatedToken.
47321369Sdim  ///
48321369Sdim  /// \p StartOfTokenColumn is the column at which the token will start after
49321369Sdim  /// this replacement. It is needed for determining how \p Spaces is turned
50321369Sdim  /// into tabs and spaces for some format styles.
51321369Sdim  void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
52261991Sdim                         unsigned StartOfTokenColumn,
53261991Sdim                         bool InPPDirective = false);
54251609Sdim
55341825Sdim  /// Adds information about an unchangeable token's whitespace.
56251609Sdim  ///
57261991Sdim  /// Needs to be called for every token for which \c replaceWhitespace
58261991Sdim  /// was not called.
59261991Sdim  void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
60251609Sdim
61327952Sdim  llvm::Error addReplacement(const tooling::Replacement &Replacement);
62327952Sdim
63341825Sdim  /// Inserts or replaces whitespace in the middle of a token.
64251609Sdim  ///
65261991Sdim  /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
66261991Sdim  /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
67261991Sdim  /// characters.
68251609Sdim  ///
69276479Sdim  /// Note: \p Spaces can be negative to retain information about initial
70276479Sdim  /// relative column offset between a line of a block comment and the start of
71276479Sdim  /// the comment. This negative offset may be compensated by trailing comment
72276479Sdim  /// alignment here. In all other cases negative \p Spaces will be truncated to
73276479Sdim  /// 0.
74276479Sdim  ///
75261991Sdim  /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
76261991Sdim  /// used to align backslashes correctly.
77261991Sdim  void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
78261991Sdim                                unsigned ReplaceChars,
79261991Sdim                                StringRef PreviousPostfix,
80261991Sdim                                StringRef CurrentPrefix, bool InPPDirective,
81321369Sdim                                unsigned Newlines, int Spaces);
82251609Sdim
83341825Sdim  /// Returns all the \c Replacements created during formatting.
84251609Sdim  const tooling::Replacements &generateReplacements();
85251609Sdim
86341825Sdim  /// Represents a change before a token, a break inside a token,
87261991Sdim  /// or the layout of an unchanged token (or whitespace within).
88261991Sdim  struct Change {
89341825Sdim    /// Functor to sort changes in original source order.
90261991Sdim    class IsBeforeInFile {
91261991Sdim    public:
92261991Sdim      IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
93261991Sdim      bool operator()(const Change &C1, const Change &C2) const;
94251609Sdim
95261991Sdim    private:
96261991Sdim      const SourceManager &SourceMgr;
97261991Sdim    };
98251609Sdim
99341825Sdim    /// Creates a \c Change.
100261991Sdim    ///
101261991Sdim    /// The generated \c Change will replace the characters at
102261991Sdim    /// \p OriginalWhitespaceRange with a concatenation of
103261991Sdim    /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
104261991Sdim    /// and \p CurrentLinePrefix.
105261991Sdim    ///
106261991Sdim    /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out
107261991Sdim    /// trailing comments and escaped newlines.
108321369Sdim    Change(const FormatToken &Tok, bool CreateReplacement,
109321369Sdim           SourceRange OriginalWhitespaceRange, int Spaces,
110321369Sdim           unsigned StartOfTokenColumn, unsigned NewlinesBefore,
111321369Sdim           StringRef PreviousLinePostfix, StringRef CurrentLinePrefix,
112321369Sdim           bool ContinuesPPDirective, bool IsInsideToken);
113251609Sdim
114321369Sdim    // The kind of the token whose whitespace this change replaces, or in which
115321369Sdim    // this change inserts whitespace.
116321369Sdim    // FIXME: Currently this is not set correctly for breaks inside comments, as
117321369Sdim    // the \c BreakableToken is still doing its own alignment.
118321369Sdim    const FormatToken *Tok;
119321369Sdim
120261991Sdim    bool CreateReplacement;
121261991Sdim    // Changes might be in the middle of a token, so we cannot just keep the
122261991Sdim    // FormatToken around to query its information.
123261991Sdim    SourceRange OriginalWhitespaceRange;
124261991Sdim    unsigned StartOfTokenColumn;
125261991Sdim    unsigned NewlinesBefore;
126261991Sdim    std::string PreviousLinePostfix;
127261991Sdim    std::string CurrentLinePrefix;
128261991Sdim    bool ContinuesPPDirective;
129251609Sdim
130261991Sdim    // The number of spaces in front of the token or broken part of the token.
131261991Sdim    // This will be adapted when aligning tokens.
132276479Sdim    // Can be negative to retain information about the initial relative offset
133276479Sdim    // of the lines in a block comment. This is used when aligning trailing
134276479Sdim    // comments. Uncompensated negative offset is truncated to 0.
135276479Sdim    int Spaces;
136261991Sdim
137296417Sdim    // If this change is inside of a token but not at the start of the token or
138296417Sdim    // directly after a newline.
139296417Sdim    bool IsInsideToken;
140296417Sdim
141261991Sdim    // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
142261991Sdim    // \c EscapedNewlineColumn will be calculated in
143261991Sdim    // \c calculateLineBreakInformation.
144261991Sdim    bool IsTrailingComment;
145261991Sdim    unsigned TokenLength;
146261991Sdim    unsigned PreviousEndOfTokenColumn;
147261991Sdim    unsigned EscapedNewlineColumn;
148276479Sdim
149276479Sdim    // These fields are used to retain correct relative line indentation in a
150276479Sdim    // block comment when aligning trailing comments.
151276479Sdim    //
152276479Sdim    // If this Change represents a continuation of a block comment,
153276479Sdim    // \c StartOfBlockComment is pointer to the first Change in the block
154276479Sdim    // comment. \c IndentationOffset is a relative column offset to this
155276479Sdim    // change, so that the correct column can be reconstructed at the end of
156276479Sdim    // the alignment process.
157276479Sdim    const Change *StartOfBlockComment;
158276479Sdim    int IndentationOffset;
159321369Sdim
160323112Sdim    // A combination of indent level and nesting level, which are used in
161321369Sdim    // tandem to compute lexical scope, for the purposes of deciding
162321369Sdim    // when to stop consecutive alignment runs.
163323112Sdim    std::pair<unsigned, unsigned> indentAndNestingLevel() const {
164323112Sdim      return std::make_pair(Tok->IndentLevel, Tok->NestingLevel);
165321369Sdim    }
166251609Sdim  };
167251609Sdim
168296417Sdimprivate:
169341825Sdim  /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens
170261991Sdim  /// or token parts in a line and \c PreviousEndOfTokenColumn and
171261991Sdim  /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
172261991Sdim  void calculateLineBreakInformation();
173251609Sdim
174353358Sdim  /// \brief Align consecutive C/C++ preprocessor macros over all \c Changes.
175353358Sdim  void alignConsecutiveMacros();
176353358Sdim
177341825Sdim  /// Align consecutive assignments over all \c Changes.
178288943Sdim  void alignConsecutiveAssignments();
179288943Sdim
180341825Sdim  /// Align consecutive declarations over all \c Changes.
181296417Sdim  void alignConsecutiveDeclarations();
182288943Sdim
183341825Sdim  /// Align trailing comments over all \c Changes.
184261991Sdim  void alignTrailingComments();
185251609Sdim
186341825Sdim  /// Align trailing comments from change \p Start to change \p End at
187261991Sdim  /// the specified \p Column.
188261991Sdim  void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
189261991Sdim
190341825Sdim  /// Align escaped newlines over all \c Changes.
191261991Sdim  void alignEscapedNewlines();
192261991Sdim
193341825Sdim  /// Align escaped newlines from change \p Start to change \p End at
194261991Sdim  /// the specified \p Column.
195261991Sdim  void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
196261991Sdim
197341825Sdim  /// Fill \c Replaces with the replacements for all effective changes.
198261991Sdim  void generateChanges();
199261991Sdim
200341825Sdim  /// Stores \p Text as the replacement for the whitespace in \p Range.
201296417Sdim  void storeReplacement(SourceRange Range, StringRef Text);
202261991Sdim  void appendNewlineText(std::string &Text, unsigned Newlines);
203327952Sdim  void appendEscapedNewlineText(std::string &Text, unsigned Newlines,
204327952Sdim                                unsigned PreviousEndOfTokenColumn,
205327952Sdim                                unsigned EscapedNewlineColumn);
206261991Sdim  void appendIndentText(std::string &Text, unsigned IndentLevel,
207261991Sdim                        unsigned Spaces, unsigned WhitespaceStartColumn);
208261991Sdim
209261991Sdim  SmallVector<Change, 16> Changes;
210309124Sdim  const SourceManager &SourceMgr;
211251609Sdim  tooling::Replacements Replaces;
212251609Sdim  const FormatStyle &Style;
213261991Sdim  bool UseCRLF;
214251609Sdim};
215251609Sdim
216251609Sdim} // namespace format
217251609Sdim} // namespace clang
218251609Sdim
219280031Sdim#endif
220