1259701Sdim//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2259701Sdim//
3259701Sdim//                     The LLVM Compiler Infrastructure
4259701Sdim//
5259701Sdim// This file is distributed under the University of Illinois Open Source
6259701Sdim// License. See LICENSE.TXT for details.
7259701Sdim//
8259701Sdim//===----------------------------------------------------------------------===//
9259701Sdim///
10259701Sdim/// \file
11259701Sdim/// \brief This file implements an indenter that manages the indentation of
12259701Sdim/// continuations.
13259701Sdim///
14259701Sdim//===----------------------------------------------------------------------===//
15259701Sdim
16259701Sdim#ifndef LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
17259701Sdim#define LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
18259701Sdim
19259701Sdim#include "Encoding.h"
20259701Sdim#include "clang/Format/Format.h"
21259701Sdim
22259701Sdimnamespace clang {
23259701Sdimclass SourceManager;
24259701Sdim
25259701Sdimnamespace format {
26259701Sdim
27259701Sdimclass AnnotatedLine;
28259701Sdimstruct FormatToken;
29259701Sdimstruct LineState;
30259701Sdimstruct ParenState;
31259701Sdimclass WhitespaceManager;
32259701Sdim
33259701Sdimclass ContinuationIndenter {
34259701Sdimpublic:
35259701Sdim  /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
36259701Sdim  /// column \p FirstIndent.
37259701Sdim  ContinuationIndenter(const FormatStyle &Style, SourceManager &SourceMgr,
38259701Sdim                       WhitespaceManager &Whitespaces,
39259701Sdim                       encoding::Encoding Encoding,
40259701Sdim                       bool BinPackInconclusiveFunctions);
41259701Sdim
42259701Sdim  /// \brief Get the initial state, i.e. the state after placing \p Line's
43259701Sdim  /// first token at \p FirstIndent.
44259701Sdim  LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
45259701Sdim                            bool DryRun);
46259701Sdim
47259701Sdim  // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
48259701Sdim  // better home.
49259701Sdim  /// \brief Returns \c true, if a line break after \p State is allowed.
50259701Sdim  bool canBreak(const LineState &State);
51259701Sdim
52259701Sdim  /// \brief Returns \c true, if a line break after \p State is mandatory.
53259701Sdim  bool mustBreak(const LineState &State);
54259701Sdim
55259701Sdim  /// \brief Appends the next token to \p State and updates information
56259701Sdim  /// necessary for indentation.
57259701Sdim  ///
58259701Sdim  /// Puts the token on the current line if \p Newline is \c false and adds a
59259701Sdim  /// line break and necessary indentation otherwise.
60259701Sdim  ///
61259701Sdim  /// If \p DryRun is \c false, also creates and stores the required
62259701Sdim  /// \c Replacement.
63259701Sdim  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
64259701Sdim                           unsigned ExtraSpaces = 0);
65259701Sdim
66259701Sdim  /// \brief Get the column limit for this line. This is the style's column
67259701Sdim  /// limit, potentially reduced for preprocessor definitions.
68259701Sdim  unsigned getColumnLimit(const LineState &State) const;
69259701Sdim
70259701Sdimprivate:
71259701Sdim  /// \brief Mark the next token as consumed in \p State and modify its stacks
72259701Sdim  /// accordingly.
73259701Sdim  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
74259701Sdim
75259701Sdim  /// \brief If the current token sticks out over the end of the line, break
76259701Sdim  /// it if possible.
77259701Sdim  ///
78259701Sdim  /// \returns An extra penalty if a token was broken, otherwise 0.
79259701Sdim  ///
80259701Sdim  /// The returned penalty will cover the cost of the additional line breaks and
81259701Sdim  /// column limit violation in all lines except for the last one. The penalty
82259701Sdim  /// for the column limit violation in the last line (and in single line
83259701Sdim  /// tokens) is handled in \c addNextStateToQueue.
84259701Sdim  unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
85259701Sdim                                bool DryRun);
86259701Sdim
87259701Sdim  /// \brief Appends the next token to \p State and updates information
88259701Sdim  /// necessary for indentation.
89259701Sdim  ///
90259701Sdim  /// Puts the token on the current line.
91259701Sdim  ///
92259701Sdim  /// If \p DryRun is \c false, also creates and stores the required
93259701Sdim  /// \c Replacement.
94259701Sdim  void addTokenOnCurrentLine(LineState &State, bool DryRun,
95259701Sdim                             unsigned ExtraSpaces);
96259701Sdim
97259701Sdim  /// \brief Appends the next token to \p State and updates information
98259701Sdim  /// necessary for indentation.
99259701Sdim  ///
100259701Sdim  /// Adds a line break and necessary indentation.
101259701Sdim  ///
102259701Sdim  /// If \p DryRun is \c false, also creates and stores the required
103259701Sdim  /// \c Replacement.
104259701Sdim  unsigned addTokenOnNewLine(LineState &State, bool DryRun);
105259701Sdim
106259701Sdim  /// \brief Adds a multiline token to the \p State.
107259701Sdim  ///
108259701Sdim  /// \returns Extra penalty for the first line of the literal: last line is
109259701Sdim  /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
110259701Sdim  /// matter, as we don't change them.
111259701Sdim  unsigned addMultilineToken(const FormatToken &Current, LineState &State);
112259701Sdim
113259701Sdim  /// \brief Returns \c true if the next token starts a multiline string
114259701Sdim  /// literal.
115259701Sdim  ///
116259701Sdim  /// This includes implicitly concatenated strings, strings that will be broken
117259701Sdim  /// by clang-format and string literals with escaped newlines.
118259701Sdim  bool NextIsMultilineString(const LineState &State);
119259701Sdim
120259701Sdim  FormatStyle Style;
121259701Sdim  SourceManager &SourceMgr;
122259701Sdim  WhitespaceManager &Whitespaces;
123259701Sdim  encoding::Encoding Encoding;
124259701Sdim  bool BinPackInconclusiveFunctions;
125259701Sdim};
126259701Sdim
127259701Sdimstruct ParenState {
128259701Sdim  ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
129259701Sdim             bool AvoidBinPacking, bool NoLineBreak)
130259701Sdim      : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
131259701Sdim        FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0),
132259701Sdim        AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
133259701Sdim        NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0),
134259701Sdim        StartOfArraySubscripts(0), NestedNameSpecifierContinuation(0),
135259701Sdim        CallContinuation(0), VariablePos(0), ContainsLineBreak(false),
136259701Sdim        ContainsUnwrappedBuilder(0) {}
137259701Sdim
138259701Sdim  /// \brief The position to which a specific parenthesis level needs to be
139259701Sdim  /// indented.
140259701Sdim  unsigned Indent;
141259701Sdim
142259701Sdim  /// \brief The number of indentation levels of the block.
143259701Sdim  unsigned IndentLevel;
144259701Sdim
145259701Sdim  /// \brief The position of the last space on each level.
146259701Sdim  ///
147259701Sdim  /// Used e.g. to break like:
148259701Sdim  /// functionCall(Parameter, otherCall(
149259701Sdim  ///                             OtherParameter));
150259701Sdim  unsigned LastSpace;
151259701Sdim
152259701Sdim  /// \brief The position the first "<<" operator encountered on each level.
153259701Sdim  ///
154259701Sdim  /// Used to align "<<" operators. 0 if no such operator has been encountered
155259701Sdim  /// on a level.
156259701Sdim  unsigned FirstLessLess;
157259701Sdim
158259701Sdim  /// \brief Whether a newline needs to be inserted before the block's closing
159259701Sdim  /// brace.
160259701Sdim  ///
161259701Sdim  /// We only want to insert a newline before the closing brace if there also
162259701Sdim  /// was a newline after the beginning left brace.
163259701Sdim  bool BreakBeforeClosingBrace;
164259701Sdim
165259701Sdim  /// \brief The column of a \c ? in a conditional expression;
166259701Sdim  unsigned QuestionColumn;
167259701Sdim
168259701Sdim  /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
169259701Sdim  /// lines, in this context.
170259701Sdim  bool AvoidBinPacking;
171259701Sdim
172259701Sdim  /// \brief Break after the next comma (or all the commas in this context if
173259701Sdim  /// \c AvoidBinPacking is \c true).
174259701Sdim  bool BreakBeforeParameter;
175259701Sdim
176259701Sdim  /// \brief Line breaking in this context would break a formatting rule.
177259701Sdim  bool NoLineBreak;
178259701Sdim
179259701Sdim  /// \brief The position of the colon in an ObjC method declaration/call.
180259701Sdim  unsigned ColonPos;
181259701Sdim
182259701Sdim  /// \brief The start of the most recent function in a builder-type call.
183259701Sdim  unsigned StartOfFunctionCall;
184259701Sdim
185259701Sdim  /// \brief Contains the start of array subscript expressions, so that they
186259701Sdim  /// can be aligned.
187259701Sdim  unsigned StartOfArraySubscripts;
188259701Sdim
189259701Sdim  /// \brief If a nested name specifier was broken over multiple lines, this
190259701Sdim  /// contains the start column of the second line. Otherwise 0.
191259701Sdim  unsigned NestedNameSpecifierContinuation;
192259701Sdim
193259701Sdim  /// \brief If a call expression was broken over multiple lines, this
194259701Sdim  /// contains the start column of the second line. Otherwise 0.
195259701Sdim  unsigned CallContinuation;
196259701Sdim
197259701Sdim  /// \brief The column of the first variable name in a variable declaration.
198259701Sdim  ///
199259701Sdim  /// Used to align further variables if necessary.
200259701Sdim  unsigned VariablePos;
201259701Sdim
202259701Sdim  /// \brief \c true if this \c ParenState already contains a line-break.
203259701Sdim  ///
204259701Sdim  /// The first line break in a certain \c ParenState causes extra penalty so
205259701Sdim  /// that clang-format prefers similar breaks, i.e. breaks in the same
206259701Sdim  /// parenthesis.
207259701Sdim  bool ContainsLineBreak;
208259701Sdim
209259701Sdim  /// \brief \c true if this \c ParenState contains multiple segments of a
210259701Sdim  /// builder-type call on one line.
211259701Sdim  bool ContainsUnwrappedBuilder;
212259701Sdim
213259701Sdim  bool operator<(const ParenState &Other) const {
214259701Sdim    if (Indent != Other.Indent)
215259701Sdim      return Indent < Other.Indent;
216259701Sdim    if (LastSpace != Other.LastSpace)
217259701Sdim      return LastSpace < Other.LastSpace;
218259701Sdim    if (FirstLessLess != Other.FirstLessLess)
219259701Sdim      return FirstLessLess < Other.FirstLessLess;
220259701Sdim    if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
221259701Sdim      return BreakBeforeClosingBrace;
222259701Sdim    if (QuestionColumn != Other.QuestionColumn)
223259701Sdim      return QuestionColumn < Other.QuestionColumn;
224259701Sdim    if (AvoidBinPacking != Other.AvoidBinPacking)
225259701Sdim      return AvoidBinPacking;
226259701Sdim    if (BreakBeforeParameter != Other.BreakBeforeParameter)
227259701Sdim      return BreakBeforeParameter;
228259701Sdim    if (NoLineBreak != Other.NoLineBreak)
229259701Sdim      return NoLineBreak;
230259701Sdim    if (ColonPos != Other.ColonPos)
231259701Sdim      return ColonPos < Other.ColonPos;
232259701Sdim    if (StartOfFunctionCall != Other.StartOfFunctionCall)
233259701Sdim      return StartOfFunctionCall < Other.StartOfFunctionCall;
234259701Sdim    if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
235259701Sdim      return StartOfArraySubscripts < Other.StartOfArraySubscripts;
236259701Sdim    if (CallContinuation != Other.CallContinuation)
237259701Sdim      return CallContinuation < Other.CallContinuation;
238259701Sdim    if (VariablePos != Other.VariablePos)
239259701Sdim      return VariablePos < Other.VariablePos;
240259701Sdim    if (ContainsLineBreak != Other.ContainsLineBreak)
241259701Sdim      return ContainsLineBreak < Other.ContainsLineBreak;
242259701Sdim    if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
243259701Sdim      return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder;
244259701Sdim    return false;
245259701Sdim  }
246259701Sdim};
247259701Sdim
248259701Sdim/// \brief The current state when indenting a unwrapped line.
249259701Sdim///
250259701Sdim/// As the indenting tries different combinations this is copied by value.
251259701Sdimstruct LineState {
252259701Sdim  /// \brief The number of used columns in the current line.
253259701Sdim  unsigned Column;
254259701Sdim
255259701Sdim  /// \brief The token that needs to be next formatted.
256259701Sdim  FormatToken *NextToken;
257259701Sdim
258259701Sdim  /// \brief \c true if this line contains a continued for-loop section.
259259701Sdim  bool LineContainsContinuedForLoopSection;
260259701Sdim
261259701Sdim  /// \brief The level of nesting inside (), [], <> and {}.
262259701Sdim  unsigned ParenLevel;
263259701Sdim
264259701Sdim  /// \brief The \c ParenLevel at the start of this line.
265259701Sdim  unsigned StartOfLineLevel;
266259701Sdim
267259701Sdim  /// \brief The lowest \c ParenLevel on the current line.
268259701Sdim  unsigned LowestLevelOnLine;
269259701Sdim
270259701Sdim  /// \brief The start column of the string literal, if we're in a string
271259701Sdim  /// literal sequence, 0 otherwise.
272259701Sdim  unsigned StartOfStringLiteral;
273259701Sdim
274259701Sdim  /// \brief A stack keeping track of properties applying to parenthesis
275259701Sdim  /// levels.
276259701Sdim  std::vector<ParenState> Stack;
277259701Sdim
278259701Sdim  /// \brief Ignore the stack of \c ParenStates for state comparison.
279259701Sdim  ///
280259701Sdim  /// In long and deeply nested unwrapped lines, the current algorithm can
281259701Sdim  /// be insufficient for finding the best formatting with a reasonable amount
282259701Sdim  /// of time and memory. Setting this flag will effectively lead to the
283259701Sdim  /// algorithm not analyzing some combinations. However, these combinations
284259701Sdim  /// rarely contain the optimal solution: In short, accepting a higher
285259701Sdim  /// penalty early would need to lead to different values in the \c
286259701Sdim  /// ParenState stack (in an otherwise identical state) and these different
287259701Sdim  /// values would need to lead to a significant amount of avoided penalty
288259701Sdim  /// later.
289259701Sdim  ///
290259701Sdim  /// FIXME: Come up with a better algorithm instead.
291259701Sdim  bool IgnoreStackForComparison;
292259701Sdim
293259701Sdim  /// \brief The indent of the first token.
294259701Sdim  unsigned FirstIndent;
295259701Sdim
296259701Sdim  /// \brief The line that is being formatted.
297259701Sdim  ///
298259701Sdim  /// Does not need to be considered for memoization because it doesn't change.
299259701Sdim  const AnnotatedLine *Line;
300259701Sdim
301259701Sdim  /// \brief Comparison operator to be able to used \c LineState in \c map.
302259701Sdim  bool operator<(const LineState &Other) const {
303259701Sdim    if (NextToken != Other.NextToken)
304259701Sdim      return NextToken < Other.NextToken;
305259701Sdim    if (Column != Other.Column)
306259701Sdim      return Column < Other.Column;
307259701Sdim    if (LineContainsContinuedForLoopSection !=
308259701Sdim        Other.LineContainsContinuedForLoopSection)
309259701Sdim      return LineContainsContinuedForLoopSection;
310259701Sdim    if (ParenLevel != Other.ParenLevel)
311259701Sdim      return ParenLevel < Other.ParenLevel;
312259701Sdim    if (StartOfLineLevel != Other.StartOfLineLevel)
313259701Sdim      return StartOfLineLevel < Other.StartOfLineLevel;
314259701Sdim    if (LowestLevelOnLine != Other.LowestLevelOnLine)
315259701Sdim      return LowestLevelOnLine < Other.LowestLevelOnLine;
316259701Sdim    if (StartOfStringLiteral != Other.StartOfStringLiteral)
317259701Sdim      return StartOfStringLiteral < Other.StartOfStringLiteral;
318259701Sdim    if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
319259701Sdim      return false;
320259701Sdim    return Stack < Other.Stack;
321259701Sdim  }
322259701Sdim};
323259701Sdim
324259701Sdim} // end namespace format
325259701Sdim} // end namespace clang
326259701Sdim
327259701Sdim#endif // LLVM_CLANG_FORMAT_CONTINUATION_INDENTER_H
328