1259701Sdim//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// 2259701Sdim// 3259701Sdim// The LLVM Compiler Infrastructure 4259701Sdim// 5259701Sdim// This file is distributed under the University of Illinois Open Source 6259701Sdim// License. See LICENSE.TXT for details. 7259701Sdim// 8259701Sdim//===----------------------------------------------------------------------===// 9259701Sdim/// 10259701Sdim/// \file 11259701Sdim/// \brief This file implements an indenter that manages the indentation of 12259701Sdim/// continuations. 13259701Sdim/// 14259701Sdim//===----------------------------------------------------------------------===// 15259701Sdim 16259701Sdim#ifndef LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H 17259701Sdim#define LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H 18259701Sdim 19259701Sdim#include "Encoding.h" 20259701Sdim#include "clang/Format/Format.h" 21259701Sdim 22259701Sdimnamespace clang { 23259701Sdimclass SourceManager; 24259701Sdim 25259701Sdimnamespace format { 26259701Sdim 27259701Sdimclass AnnotatedLine; 28259701Sdimstruct FormatToken; 29259701Sdimstruct LineState; 30259701Sdimstruct ParenState; 31259701Sdimclass WhitespaceManager; 32259701Sdim 33259701Sdimclass ContinuationIndenter { 34259701Sdimpublic: 35259701Sdim /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in 36259701Sdim /// column \p FirstIndent. 37259701Sdim ContinuationIndenter(const FormatStyle &Style, SourceManager &SourceMgr, 38259701Sdim WhitespaceManager &Whitespaces, 39259701Sdim encoding::Encoding Encoding, 40259701Sdim bool BinPackInconclusiveFunctions); 41259701Sdim 42259701Sdim /// \brief Get the initial state, i.e. the state after placing \p Line's 43259701Sdim /// first token at \p FirstIndent. 44259701Sdim LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line, 45259701Sdim bool DryRun); 46259701Sdim 47259701Sdim // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a 48259701Sdim // better home. 49259701Sdim /// \brief Returns \c true, if a line break after \p State is allowed. 50259701Sdim bool canBreak(const LineState &State); 51259701Sdim 52259701Sdim /// \brief Returns \c true, if a line break after \p State is mandatory. 53259701Sdim bool mustBreak(const LineState &State); 54259701Sdim 55259701Sdim /// \brief Appends the next token to \p State and updates information 56259701Sdim /// necessary for indentation. 57259701Sdim /// 58259701Sdim /// Puts the token on the current line if \p Newline is \c false and adds a 59259701Sdim /// line break and necessary indentation otherwise. 60259701Sdim /// 61259701Sdim /// If \p DryRun is \c false, also creates and stores the required 62259701Sdim /// \c Replacement. 63259701Sdim unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, 64259701Sdim unsigned ExtraSpaces = 0); 65259701Sdim 66259701Sdim /// \brief Get the column limit for this line. This is the style's column 67259701Sdim /// limit, potentially reduced for preprocessor definitions. 68259701Sdim unsigned getColumnLimit(const LineState &State) const; 69259701Sdim 70259701Sdimprivate: 71259701Sdim /// \brief Mark the next token as consumed in \p State and modify its stacks 72259701Sdim /// accordingly. 73259701Sdim unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); 74259701Sdim 75259701Sdim /// \brief If the current token sticks out over the end of the line, break 76259701Sdim /// it if possible. 77259701Sdim /// 78259701Sdim /// \returns An extra penalty if a token was broken, otherwise 0. 79259701Sdim /// 80259701Sdim /// The returned penalty will cover the cost of the additional line breaks and 81259701Sdim /// column limit violation in all lines except for the last one. The penalty 82259701Sdim /// for the column limit violation in the last line (and in single line 83259701Sdim /// tokens) is handled in \c addNextStateToQueue. 84259701Sdim unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, 85259701Sdim bool DryRun); 86259701Sdim 87259701Sdim /// \brief Appends the next token to \p State and updates information 88259701Sdim /// necessary for indentation. 89259701Sdim /// 90259701Sdim /// Puts the token on the current line. 91259701Sdim /// 92259701Sdim /// If \p DryRun is \c false, also creates and stores the required 93259701Sdim /// \c Replacement. 94259701Sdim void addTokenOnCurrentLine(LineState &State, bool DryRun, 95259701Sdim unsigned ExtraSpaces); 96259701Sdim 97259701Sdim /// \brief Appends the next token to \p State and updates information 98259701Sdim /// necessary for indentation. 99259701Sdim /// 100259701Sdim /// Adds a line break and necessary indentation. 101259701Sdim /// 102259701Sdim /// If \p DryRun is \c false, also creates and stores the required 103259701Sdim /// \c Replacement. 104259701Sdim unsigned addTokenOnNewLine(LineState &State, bool DryRun); 105259701Sdim 106259701Sdim /// \brief Adds a multiline token to the \p State. 107259701Sdim /// 108259701Sdim /// \returns Extra penalty for the first line of the literal: last line is 109259701Sdim /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't 110259701Sdim /// matter, as we don't change them. 111259701Sdim unsigned addMultilineToken(const FormatToken &Current, LineState &State); 112259701Sdim 113259701Sdim /// \brief Returns \c true if the next token starts a multiline string 114259701Sdim /// literal. 115259701Sdim /// 116259701Sdim /// This includes implicitly concatenated strings, strings that will be broken 117259701Sdim /// by clang-format and string literals with escaped newlines. 118259701Sdim bool NextIsMultilineString(const LineState &State); 119259701Sdim 120259701Sdim FormatStyle Style; 121259701Sdim SourceManager &SourceMgr; 122259701Sdim WhitespaceManager &Whitespaces; 123259701Sdim encoding::Encoding Encoding; 124259701Sdim bool BinPackInconclusiveFunctions; 125259701Sdim}; 126259701Sdim 127259701Sdimstruct ParenState { 128259701Sdim ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace, 129259701Sdim bool AvoidBinPacking, bool NoLineBreak) 130259701Sdim : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace), 131259701Sdim FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0), 132259701Sdim AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 133259701Sdim NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0), 134259701Sdim StartOfArraySubscripts(0), NestedNameSpecifierContinuation(0), 135259701Sdim CallContinuation(0), VariablePos(0), ContainsLineBreak(false), 136259701Sdim ContainsUnwrappedBuilder(0) {} 137259701Sdim 138259701Sdim /// \brief The position to which a specific parenthesis level needs to be 139259701Sdim /// indented. 140259701Sdim unsigned Indent; 141259701Sdim 142259701Sdim /// \brief The number of indentation levels of the block. 143259701Sdim unsigned IndentLevel; 144259701Sdim 145259701Sdim /// \brief The position of the last space on each level. 146259701Sdim /// 147259701Sdim /// Used e.g. to break like: 148259701Sdim /// functionCall(Parameter, otherCall( 149259701Sdim /// OtherParameter)); 150259701Sdim unsigned LastSpace; 151259701Sdim 152259701Sdim /// \brief The position the first "<<" operator encountered on each level. 153259701Sdim /// 154259701Sdim /// Used to align "<<" operators. 0 if no such operator has been encountered 155259701Sdim /// on a level. 156259701Sdim unsigned FirstLessLess; 157259701Sdim 158259701Sdim /// \brief Whether a newline needs to be inserted before the block's closing 159259701Sdim /// brace. 160259701Sdim /// 161259701Sdim /// We only want to insert a newline before the closing brace if there also 162259701Sdim /// was a newline after the beginning left brace. 163259701Sdim bool BreakBeforeClosingBrace; 164259701Sdim 165259701Sdim /// \brief The column of a \c ? in a conditional expression; 166259701Sdim unsigned QuestionColumn; 167259701Sdim 168259701Sdim /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple 169259701Sdim /// lines, in this context. 170259701Sdim bool AvoidBinPacking; 171259701Sdim 172259701Sdim /// \brief Break after the next comma (or all the commas in this context if 173259701Sdim /// \c AvoidBinPacking is \c true). 174259701Sdim bool BreakBeforeParameter; 175259701Sdim 176259701Sdim /// \brief Line breaking in this context would break a formatting rule. 177259701Sdim bool NoLineBreak; 178259701Sdim 179259701Sdim /// \brief The position of the colon in an ObjC method declaration/call. 180259701Sdim unsigned ColonPos; 181259701Sdim 182259701Sdim /// \brief The start of the most recent function in a builder-type call. 183259701Sdim unsigned StartOfFunctionCall; 184259701Sdim 185259701Sdim /// \brief Contains the start of array subscript expressions, so that they 186259701Sdim /// can be aligned. 187259701Sdim unsigned StartOfArraySubscripts; 188259701Sdim 189259701Sdim /// \brief If a nested name specifier was broken over multiple lines, this 190259701Sdim /// contains the start column of the second line. Otherwise 0. 191259701Sdim unsigned NestedNameSpecifierContinuation; 192259701Sdim 193259701Sdim /// \brief If a call expression was broken over multiple lines, this 194259701Sdim /// contains the start column of the second line. Otherwise 0. 195259701Sdim unsigned CallContinuation; 196259701Sdim 197259701Sdim /// \brief The column of the first variable name in a variable declaration. 198259701Sdim /// 199259701Sdim /// Used to align further variables if necessary. 200259701Sdim unsigned VariablePos; 201259701Sdim 202259701Sdim /// \brief \c true if this \c ParenState already contains a line-break. 203259701Sdim /// 204259701Sdim /// The first line break in a certain \c ParenState causes extra penalty so 205259701Sdim /// that clang-format prefers similar breaks, i.e. breaks in the same 206259701Sdim /// parenthesis. 207259701Sdim bool ContainsLineBreak; 208259701Sdim 209259701Sdim /// \brief \c true if this \c ParenState contains multiple segments of a 210259701Sdim /// builder-type call on one line. 211259701Sdim bool ContainsUnwrappedBuilder; 212259701Sdim 213259701Sdim bool operator<(const ParenState &Other) const { 214259701Sdim if (Indent != Other.Indent) 215259701Sdim return Indent < Other.Indent; 216259701Sdim if (LastSpace != Other.LastSpace) 217259701Sdim return LastSpace < Other.LastSpace; 218259701Sdim if (FirstLessLess != Other.FirstLessLess) 219259701Sdim return FirstLessLess < Other.FirstLessLess; 220259701Sdim if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 221259701Sdim return BreakBeforeClosingBrace; 222259701Sdim if (QuestionColumn != Other.QuestionColumn) 223259701Sdim return QuestionColumn < Other.QuestionColumn; 224259701Sdim if (AvoidBinPacking != Other.AvoidBinPacking) 225259701Sdim return AvoidBinPacking; 226259701Sdim if (BreakBeforeParameter != Other.BreakBeforeParameter) 227259701Sdim return BreakBeforeParameter; 228259701Sdim if (NoLineBreak != Other.NoLineBreak) 229259701Sdim return NoLineBreak; 230259701Sdim if (ColonPos != Other.ColonPos) 231259701Sdim return ColonPos < Other.ColonPos; 232259701Sdim if (StartOfFunctionCall != Other.StartOfFunctionCall) 233259701Sdim return StartOfFunctionCall < Other.StartOfFunctionCall; 234259701Sdim if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 235259701Sdim return StartOfArraySubscripts < Other.StartOfArraySubscripts; 236259701Sdim if (CallContinuation != Other.CallContinuation) 237259701Sdim return CallContinuation < Other.CallContinuation; 238259701Sdim if (VariablePos != Other.VariablePos) 239259701Sdim return VariablePos < Other.VariablePos; 240259701Sdim if (ContainsLineBreak != Other.ContainsLineBreak) 241259701Sdim return ContainsLineBreak < Other.ContainsLineBreak; 242259701Sdim if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) 243259701Sdim return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder; 244259701Sdim return false; 245259701Sdim } 246259701Sdim}; 247259701Sdim 248259701Sdim/// \brief The current state when indenting a unwrapped line. 249259701Sdim/// 250259701Sdim/// As the indenting tries different combinations this is copied by value. 251259701Sdimstruct LineState { 252259701Sdim /// \brief The number of used columns in the current line. 253259701Sdim unsigned Column; 254259701Sdim 255259701Sdim /// \brief The token that needs to be next formatted. 256259701Sdim FormatToken *NextToken; 257259701Sdim 258259701Sdim /// \brief \c true if this line contains a continued for-loop section. 259259701Sdim bool LineContainsContinuedForLoopSection; 260259701Sdim 261259701Sdim /// \brief The level of nesting inside (), [], <> and {}. 262259701Sdim unsigned ParenLevel; 263259701Sdim 264259701Sdim /// \brief The \c ParenLevel at the start of this line. 265259701Sdim unsigned StartOfLineLevel; 266259701Sdim 267259701Sdim /// \brief The lowest \c ParenLevel on the current line. 268259701Sdim unsigned LowestLevelOnLine; 269259701Sdim 270259701Sdim /// \brief The start column of the string literal, if we're in a string 271259701Sdim /// literal sequence, 0 otherwise. 272259701Sdim unsigned StartOfStringLiteral; 273259701Sdim 274259701Sdim /// \brief A stack keeping track of properties applying to parenthesis 275259701Sdim /// levels. 276259701Sdim std::vector<ParenState> Stack; 277259701Sdim 278259701Sdim /// \brief Ignore the stack of \c ParenStates for state comparison. 279259701Sdim /// 280259701Sdim /// In long and deeply nested unwrapped lines, the current algorithm can 281259701Sdim /// be insufficient for finding the best formatting with a reasonable amount 282259701Sdim /// of time and memory. Setting this flag will effectively lead to the 283259701Sdim /// algorithm not analyzing some combinations. However, these combinations 284259701Sdim /// rarely contain the optimal solution: In short, accepting a higher 285259701Sdim /// penalty early would need to lead to different values in the \c 286259701Sdim /// ParenState stack (in an otherwise identical state) and these different 287259701Sdim /// values would need to lead to a significant amount of avoided penalty 288259701Sdim /// later. 289259701Sdim /// 290259701Sdim /// FIXME: Come up with a better algorithm instead. 291259701Sdim bool IgnoreStackForComparison; 292259701Sdim 293259701Sdim /// \brief The indent of the first token. 294259701Sdim unsigned FirstIndent; 295259701Sdim 296259701Sdim /// \brief The line that is being formatted. 297259701Sdim /// 298259701Sdim /// Does not need to be considered for memoization because it doesn't change. 299259701Sdim const AnnotatedLine *Line; 300259701Sdim 301259701Sdim /// \brief Comparison operator to be able to used \c LineState in \c map. 302259701Sdim bool operator<(const LineState &Other) const { 303259701Sdim if (NextToken != Other.NextToken) 304259701Sdim return NextToken < Other.NextToken; 305259701Sdim if (Column != Other.Column) 306259701Sdim return Column < Other.Column; 307259701Sdim if (LineContainsContinuedForLoopSection != 308259701Sdim Other.LineContainsContinuedForLoopSection) 309259701Sdim return LineContainsContinuedForLoopSection; 310259701Sdim if (ParenLevel != Other.ParenLevel) 311259701Sdim return ParenLevel < Other.ParenLevel; 312259701Sdim if (StartOfLineLevel != Other.StartOfLineLevel) 313259701Sdim return StartOfLineLevel < Other.StartOfLineLevel; 314259701Sdim if (LowestLevelOnLine != Other.LowestLevelOnLine) 315259701Sdim return LowestLevelOnLine < Other.LowestLevelOnLine; 316259701Sdim if (StartOfStringLiteral != Other.StartOfStringLiteral) 317259701Sdim return StartOfStringLiteral < Other.StartOfStringLiteral; 318259701Sdim if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 319259701Sdim return false; 320259701Sdim return Stack < Other.Stack; 321259701Sdim } 322259701Sdim}; 323259701Sdim 324259701Sdim} // end namespace format 325259701Sdim} // end namespace clang 326259701Sdim 327259701Sdim#endif // LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H 328