1251609Sdim//===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===// 2251609Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6251609Sdim// 7251609Sdim//===----------------------------------------------------------------------===// 8251609Sdim/// 9251609Sdim/// \file 10341825Sdim/// WhitespaceManager class manages whitespace around tokens and their 11251609Sdim/// replacements. 12251609Sdim/// 13251609Sdim//===----------------------------------------------------------------------===// 14251609Sdim 15280031Sdim#ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H 16280031Sdim#define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H 17251609Sdim 18251609Sdim#include "TokenAnnotator.h" 19251609Sdim#include "clang/Basic/SourceManager.h" 20251609Sdim#include "clang/Format/Format.h" 21251609Sdim#include <string> 22251609Sdim 23251609Sdimnamespace clang { 24251609Sdimnamespace format { 25251609Sdim 26341825Sdim/// Manages the whitespaces around tokens and their replacements. 27251609Sdim/// 28251609Sdim/// This includes special handling for certain constructs, e.g. the alignment of 29251609Sdim/// trailing line comments. 30261991Sdim/// 31261991Sdim/// To guarantee correctness of alignment operations, the \c WhitespaceManager 32261991Sdim/// must be informed about every token in the source file; for each token, there 33261991Sdim/// must be exactly one call to either \c replaceWhitespace or 34261991Sdim/// \c addUntouchableToken. 35261991Sdim/// 36261991Sdim/// There may be multiple calls to \c breakToken for a given token. 37251609Sdimclass WhitespaceManager { 38251609Sdimpublic: 39309124Sdim WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, 40261991Sdim bool UseCRLF) 41261991Sdim : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {} 42251609Sdim 43353358Sdim bool useCRLF() const { return UseCRLF; } 44353358Sdim 45341825Sdim /// Replaces the whitespace in front of \p Tok. Only call once for 46251609Sdim /// each \c AnnotatedToken. 47321369Sdim /// 48321369Sdim /// \p StartOfTokenColumn is the column at which the token will start after 49321369Sdim /// this replacement. It is needed for determining how \p Spaces is turned 50321369Sdim /// into tabs and spaces for some format styles. 51321369Sdim void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, 52261991Sdim unsigned StartOfTokenColumn, 53261991Sdim bool InPPDirective = false); 54251609Sdim 55341825Sdim /// Adds information about an unchangeable token's whitespace. 56251609Sdim /// 57261991Sdim /// Needs to be called for every token for which \c replaceWhitespace 58261991Sdim /// was not called. 59261991Sdim void addUntouchableToken(const FormatToken &Tok, bool InPPDirective); 60251609Sdim 61327952Sdim llvm::Error addReplacement(const tooling::Replacement &Replacement); 62327952Sdim 63341825Sdim /// Inserts or replaces whitespace in the middle of a token. 64251609Sdim /// 65261991Sdim /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix 66261991Sdim /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars 67261991Sdim /// characters. 68251609Sdim /// 69276479Sdim /// Note: \p Spaces can be negative to retain information about initial 70276479Sdim /// relative column offset between a line of a block comment and the start of 71276479Sdim /// the comment. This negative offset may be compensated by trailing comment 72276479Sdim /// alignment here. In all other cases negative \p Spaces will be truncated to 73276479Sdim /// 0. 74276479Sdim /// 75261991Sdim /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is 76261991Sdim /// used to align backslashes correctly. 77261991Sdim void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, 78261991Sdim unsigned ReplaceChars, 79261991Sdim StringRef PreviousPostfix, 80261991Sdim StringRef CurrentPrefix, bool InPPDirective, 81321369Sdim unsigned Newlines, int Spaces); 82251609Sdim 83341825Sdim /// Returns all the \c Replacements created during formatting. 84251609Sdim const tooling::Replacements &generateReplacements(); 85251609Sdim 86341825Sdim /// Represents a change before a token, a break inside a token, 87261991Sdim /// or the layout of an unchanged token (or whitespace within). 88261991Sdim struct Change { 89341825Sdim /// Functor to sort changes in original source order. 90261991Sdim class IsBeforeInFile { 91261991Sdim public: 92261991Sdim IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} 93261991Sdim bool operator()(const Change &C1, const Change &C2) const; 94251609Sdim 95261991Sdim private: 96261991Sdim const SourceManager &SourceMgr; 97261991Sdim }; 98251609Sdim 99341825Sdim /// Creates a \c Change. 100261991Sdim /// 101261991Sdim /// The generated \c Change will replace the characters at 102261991Sdim /// \p OriginalWhitespaceRange with a concatenation of 103261991Sdim /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces 104261991Sdim /// and \p CurrentLinePrefix. 105261991Sdim /// 106261991Sdim /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out 107261991Sdim /// trailing comments and escaped newlines. 108321369Sdim Change(const FormatToken &Tok, bool CreateReplacement, 109321369Sdim SourceRange OriginalWhitespaceRange, int Spaces, 110321369Sdim unsigned StartOfTokenColumn, unsigned NewlinesBefore, 111321369Sdim StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, 112321369Sdim bool ContinuesPPDirective, bool IsInsideToken); 113251609Sdim 114321369Sdim // The kind of the token whose whitespace this change replaces, or in which 115321369Sdim // this change inserts whitespace. 116321369Sdim // FIXME: Currently this is not set correctly for breaks inside comments, as 117321369Sdim // the \c BreakableToken is still doing its own alignment. 118321369Sdim const FormatToken *Tok; 119321369Sdim 120261991Sdim bool CreateReplacement; 121261991Sdim // Changes might be in the middle of a token, so we cannot just keep the 122261991Sdim // FormatToken around to query its information. 123261991Sdim SourceRange OriginalWhitespaceRange; 124261991Sdim unsigned StartOfTokenColumn; 125261991Sdim unsigned NewlinesBefore; 126261991Sdim std::string PreviousLinePostfix; 127261991Sdim std::string CurrentLinePrefix; 128261991Sdim bool ContinuesPPDirective; 129251609Sdim 130261991Sdim // The number of spaces in front of the token or broken part of the token. 131261991Sdim // This will be adapted when aligning tokens. 132276479Sdim // Can be negative to retain information about the initial relative offset 133276479Sdim // of the lines in a block comment. This is used when aligning trailing 134276479Sdim // comments. Uncompensated negative offset is truncated to 0. 135276479Sdim int Spaces; 136261991Sdim 137296417Sdim // If this change is inside of a token but not at the start of the token or 138296417Sdim // directly after a newline. 139296417Sdim bool IsInsideToken; 140296417Sdim 141261991Sdim // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and 142261991Sdim // \c EscapedNewlineColumn will be calculated in 143261991Sdim // \c calculateLineBreakInformation. 144261991Sdim bool IsTrailingComment; 145261991Sdim unsigned TokenLength; 146261991Sdim unsigned PreviousEndOfTokenColumn; 147261991Sdim unsigned EscapedNewlineColumn; 148276479Sdim 149276479Sdim // These fields are used to retain correct relative line indentation in a 150276479Sdim // block comment when aligning trailing comments. 151276479Sdim // 152276479Sdim // If this Change represents a continuation of a block comment, 153276479Sdim // \c StartOfBlockComment is pointer to the first Change in the block 154276479Sdim // comment. \c IndentationOffset is a relative column offset to this 155276479Sdim // change, so that the correct column can be reconstructed at the end of 156276479Sdim // the alignment process. 157276479Sdim const Change *StartOfBlockComment; 158276479Sdim int IndentationOffset; 159321369Sdim 160323112Sdim // A combination of indent level and nesting level, which are used in 161321369Sdim // tandem to compute lexical scope, for the purposes of deciding 162321369Sdim // when to stop consecutive alignment runs. 163323112Sdim std::pair<unsigned, unsigned> indentAndNestingLevel() const { 164323112Sdim return std::make_pair(Tok->IndentLevel, Tok->NestingLevel); 165321369Sdim } 166251609Sdim }; 167251609Sdim 168296417Sdimprivate: 169341825Sdim /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens 170261991Sdim /// or token parts in a line and \c PreviousEndOfTokenColumn and 171261991Sdim /// \c EscapedNewlineColumn for the first tokens or token parts in a line. 172261991Sdim void calculateLineBreakInformation(); 173251609Sdim 174353358Sdim /// \brief Align consecutive C/C++ preprocessor macros over all \c Changes. 175353358Sdim void alignConsecutiveMacros(); 176353358Sdim 177341825Sdim /// Align consecutive assignments over all \c Changes. 178288943Sdim void alignConsecutiveAssignments(); 179288943Sdim 180341825Sdim /// Align consecutive declarations over all \c Changes. 181296417Sdim void alignConsecutiveDeclarations(); 182288943Sdim 183341825Sdim /// Align trailing comments over all \c Changes. 184261991Sdim void alignTrailingComments(); 185251609Sdim 186341825Sdim /// Align trailing comments from change \p Start to change \p End at 187261991Sdim /// the specified \p Column. 188261991Sdim void alignTrailingComments(unsigned Start, unsigned End, unsigned Column); 189261991Sdim 190341825Sdim /// Align escaped newlines over all \c Changes. 191261991Sdim void alignEscapedNewlines(); 192261991Sdim 193341825Sdim /// Align escaped newlines from change \p Start to change \p End at 194261991Sdim /// the specified \p Column. 195261991Sdim void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column); 196261991Sdim 197341825Sdim /// Fill \c Replaces with the replacements for all effective changes. 198261991Sdim void generateChanges(); 199261991Sdim 200341825Sdim /// Stores \p Text as the replacement for the whitespace in \p Range. 201296417Sdim void storeReplacement(SourceRange Range, StringRef Text); 202261991Sdim void appendNewlineText(std::string &Text, unsigned Newlines); 203327952Sdim void appendEscapedNewlineText(std::string &Text, unsigned Newlines, 204327952Sdim unsigned PreviousEndOfTokenColumn, 205327952Sdim unsigned EscapedNewlineColumn); 206261991Sdim void appendIndentText(std::string &Text, unsigned IndentLevel, 207261991Sdim unsigned Spaces, unsigned WhitespaceStartColumn); 208261991Sdim 209261991Sdim SmallVector<Change, 16> Changes; 210309124Sdim const SourceManager &SourceMgr; 211251609Sdim tooling::Replacements Replaces; 212251609Sdim const FormatStyle &Style; 213261991Sdim bool UseCRLF; 214251609Sdim}; 215251609Sdim 216251609Sdim} // namespace format 217251609Sdim} // namespace clang 218251609Sdim 219280031Sdim#endif 220