1//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements an indenter that manages the indentation of
11/// continuations.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17
18#include "Encoding.h"
19#include "FormatToken.h"
20#include "clang/Format/Format.h"
21#include "llvm/Support/Regex.h"
22#include <map>
23#include <tuple>
24
25namespace clang {
26class SourceManager;
27
28namespace format {
29
30class AnnotatedLine;
31class BreakableToken;
32struct FormatToken;
33struct LineState;
34struct ParenState;
35struct RawStringFormatStyleManager;
36class WhitespaceManager;
37
38struct RawStringFormatStyleManager {
39  llvm::StringMap<FormatStyle> DelimiterStyle;
40  llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
41
42  RawStringFormatStyleManager(const FormatStyle &CodeStyle);
43
44  llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
45
46  llvm::Optional<FormatStyle>
47  getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
48};
49
50class ContinuationIndenter {
51public:
52  /// Constructs a \c ContinuationIndenter to format \p Line starting in
53  /// column \p FirstIndent.
54  ContinuationIndenter(const FormatStyle &Style,
55                       const AdditionalKeywords &Keywords,
56                       const SourceManager &SourceMgr,
57                       WhitespaceManager &Whitespaces,
58                       encoding::Encoding Encoding,
59                       bool BinPackInconclusiveFunctions);
60
61  /// Get the initial state, i.e. the state after placing \p Line's
62  /// first token at \p FirstIndent. When reformatting a fragment of code, as in
63  /// the case of formatting inside raw string literals, \p FirstStartColumn is
64  /// the column at which the state of the parent formatter is.
65  LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
66                            const AnnotatedLine *Line, bool DryRun);
67
68  // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
69  // better home.
70  /// Returns \c true, if a line break after \p State is allowed.
71  bool canBreak(const LineState &State);
72
73  /// Returns \c true, if a line break after \p State is mandatory.
74  bool mustBreak(const LineState &State);
75
76  /// Appends the next token to \p State and updates information
77  /// necessary for indentation.
78  ///
79  /// Puts the token on the current line if \p Newline is \c false and adds a
80  /// line break and necessary indentation otherwise.
81  ///
82  /// If \p DryRun is \c false, also creates and stores the required
83  /// \c Replacement.
84  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
85                           unsigned ExtraSpaces = 0);
86
87  /// Get the column limit for this line. This is the style's column
88  /// limit, potentially reduced for preprocessor definitions.
89  unsigned getColumnLimit(const LineState &State) const;
90
91private:
92  /// Mark the next token as consumed in \p State and modify its stacks
93  /// accordingly.
94  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
95
96  /// Update 'State' according to the next token's fake left parentheses.
97  void moveStatePastFakeLParens(LineState &State, bool Newline);
98  /// Update 'State' according to the next token's fake r_parens.
99  void moveStatePastFakeRParens(LineState &State);
100
101  /// Update 'State' according to the next token being one of "(<{[".
102  void moveStatePastScopeOpener(LineState &State, bool Newline);
103  /// Update 'State' according to the next token being one of ")>}]".
104  void moveStatePastScopeCloser(LineState &State);
105  /// Update 'State' with the next token opening a nested block.
106  void moveStateToNewBlock(LineState &State);
107
108  /// Reformats a raw string literal.
109  ///
110  /// \returns An extra penalty induced by reformatting the token.
111  unsigned reformatRawStringLiteral(const FormatToken &Current,
112                                    LineState &State,
113                                    const FormatStyle &RawStringStyle,
114                                    bool DryRun, bool Newline);
115
116  /// If the current token is at the end of the current line, handle
117  /// the transition to the next line.
118  unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
119                           bool DryRun, bool AllowBreak, bool Newline);
120
121  /// If \p Current is a raw string that is configured to be reformatted,
122  /// return the style to be used.
123  llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
124                                                const LineState &State);
125
126  /// If the current token sticks out over the end of the line, break
127  /// it if possible.
128  ///
129  /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
130  /// when tokens are broken or lines exceed the column limit, and exceeded
131  /// indicates whether the algorithm purposefully left lines exceeding the
132  /// column limit.
133  ///
134  /// The returned penalty will cover the cost of the additional line breaks
135  /// and column limit violation in all lines except for the last one. The
136  /// penalty for the column limit violation in the last line (and in single
137  /// line tokens) is handled in \c addNextStateToQueue.
138  ///
139  /// \p Strict indicates whether reflowing is allowed to leave characters
140  /// protruding the column limit; if true, lines will be split strictly within
141  /// the column limit where possible; if false, words are allowed to protrude
142  /// over the column limit as long as the penalty is less than the penalty
143  /// of a break.
144  std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
145                                                 LineState &State,
146                                                 bool AllowBreak, bool DryRun,
147                                                 bool Strict);
148
149  /// Returns the \c BreakableToken starting at \p Current, or nullptr
150  /// if the current token cannot be broken.
151  std::unique_ptr<BreakableToken>
152  createBreakableToken(const FormatToken &Current, LineState &State,
153                       bool AllowBreak);
154
155  /// Appends the next token to \p State and updates information
156  /// necessary for indentation.
157  ///
158  /// Puts the token on the current line.
159  ///
160  /// If \p DryRun is \c false, also creates and stores the required
161  /// \c Replacement.
162  void addTokenOnCurrentLine(LineState &State, bool DryRun,
163                             unsigned ExtraSpaces);
164
165  /// Appends the next token to \p State and updates information
166  /// necessary for indentation.
167  ///
168  /// Adds a line break and necessary indentation.
169  ///
170  /// If \p DryRun is \c false, also creates and stores the required
171  /// \c Replacement.
172  unsigned addTokenOnNewLine(LineState &State, bool DryRun);
173
174  /// Calculate the new column for a line wrap before the next token.
175  unsigned getNewLineColumn(const LineState &State);
176
177  /// Adds a multiline token to the \p State.
178  ///
179  /// \returns Extra penalty for the first line of the literal: last line is
180  /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
181  /// matter, as we don't change them.
182  unsigned addMultilineToken(const FormatToken &Current, LineState &State);
183
184  /// Returns \c true if the next token starts a multiline string
185  /// literal.
186  ///
187  /// This includes implicitly concatenated strings, strings that will be broken
188  /// by clang-format and string literals with escaped newlines.
189  bool nextIsMultilineString(const LineState &State);
190
191  FormatStyle Style;
192  const AdditionalKeywords &Keywords;
193  const SourceManager &SourceMgr;
194  WhitespaceManager &Whitespaces;
195  encoding::Encoding Encoding;
196  bool BinPackInconclusiveFunctions;
197  llvm::Regex CommentPragmasRegex;
198  const RawStringFormatStyleManager RawStringFormats;
199};
200
201struct ParenState {
202  ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
203             bool AvoidBinPacking, bool NoLineBreak)
204      : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
205        NestedBlockIndent(Indent), BreakBeforeClosingBrace(false),
206        AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
207        NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
208        LastOperatorWrapped(true), ContainsLineBreak(false),
209        ContainsUnwrappedBuilder(false), AlignColons(true),
210        ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
211        NestedBlockInlined(false), IsInsideObjCArrayLiteral(false) {}
212
213  /// \brief The token opening this parenthesis level, or nullptr if this level
214  /// is opened by fake parenthesis.
215  ///
216  /// Not considered for memoization as it will always have the same value at
217  /// the same token.
218  const FormatToken *Tok;
219
220  /// The position to which a specific parenthesis level needs to be
221  /// indented.
222  unsigned Indent;
223
224  /// The position of the last space on each level.
225  ///
226  /// Used e.g. to break like:
227  /// functionCall(Parameter, otherCall(
228  ///                             OtherParameter));
229  unsigned LastSpace;
230
231  /// If a block relative to this parenthesis level gets wrapped, indent
232  /// it this much.
233  unsigned NestedBlockIndent;
234
235  /// The position the first "<<" operator encountered on each level.
236  ///
237  /// Used to align "<<" operators. 0 if no such operator has been encountered
238  /// on a level.
239  unsigned FirstLessLess = 0;
240
241  /// The column of a \c ? in a conditional expression;
242  unsigned QuestionColumn = 0;
243
244  /// The position of the colon in an ObjC method declaration/call.
245  unsigned ColonPos = 0;
246
247  /// The start of the most recent function in a builder-type call.
248  unsigned StartOfFunctionCall = 0;
249
250  /// Contains the start of array subscript expressions, so that they
251  /// can be aligned.
252  unsigned StartOfArraySubscripts = 0;
253
254  /// If a nested name specifier was broken over multiple lines, this
255  /// contains the start column of the second line. Otherwise 0.
256  unsigned NestedNameSpecifierContinuation = 0;
257
258  /// If a call expression was broken over multiple lines, this
259  /// contains the start column of the second line. Otherwise 0.
260  unsigned CallContinuation = 0;
261
262  /// The column of the first variable name in a variable declaration.
263  ///
264  /// Used to align further variables if necessary.
265  unsigned VariablePos = 0;
266
267  /// Whether a newline needs to be inserted before the block's closing
268  /// brace.
269  ///
270  /// We only want to insert a newline before the closing brace if there also
271  /// was a newline after the beginning left brace.
272  bool BreakBeforeClosingBrace : 1;
273
274  /// Avoid bin packing, i.e. multiple parameters/elements on multiple
275  /// lines, in this context.
276  bool AvoidBinPacking : 1;
277
278  /// Break after the next comma (or all the commas in this context if
279  /// \c AvoidBinPacking is \c true).
280  bool BreakBeforeParameter : 1;
281
282  /// Line breaking in this context would break a formatting rule.
283  bool NoLineBreak : 1;
284
285  /// Same as \c NoLineBreak, but is restricted until the end of the
286  /// operand (including the next ",").
287  bool NoLineBreakInOperand : 1;
288
289  /// True if the last binary operator on this level was wrapped to the
290  /// next line.
291  bool LastOperatorWrapped : 1;
292
293  /// \c true if this \c ParenState already contains a line-break.
294  ///
295  /// The first line break in a certain \c ParenState causes extra penalty so
296  /// that clang-format prefers similar breaks, i.e. breaks in the same
297  /// parenthesis.
298  bool ContainsLineBreak : 1;
299
300  /// \c true if this \c ParenState contains multiple segments of a
301  /// builder-type call on one line.
302  bool ContainsUnwrappedBuilder : 1;
303
304  /// \c true if the colons of the curren ObjC method expression should
305  /// be aligned.
306  ///
307  /// Not considered for memoization as it will always have the same value at
308  /// the same token.
309  bool AlignColons : 1;
310
311  /// \c true if at least one selector name was found in the current
312  /// ObjC method expression.
313  ///
314  /// Not considered for memoization as it will always have the same value at
315  /// the same token.
316  bool ObjCSelectorNameFound : 1;
317
318  /// \c true if there are multiple nested blocks inside these parens.
319  ///
320  /// Not considered for memoization as it will always have the same value at
321  /// the same token.
322  bool HasMultipleNestedBlocks : 1;
323
324  /// The start of a nested block (e.g. lambda introducer in C++ or
325  /// "function" in JavaScript) is not wrapped to a new line.
326  bool NestedBlockInlined : 1;
327
328  /// \c true if the current \c ParenState represents an Objective-C
329  /// array literal.
330  bool IsInsideObjCArrayLiteral : 1;
331
332  bool operator<(const ParenState &Other) const {
333    if (Indent != Other.Indent)
334      return Indent < Other.Indent;
335    if (LastSpace != Other.LastSpace)
336      return LastSpace < Other.LastSpace;
337    if (NestedBlockIndent != Other.NestedBlockIndent)
338      return NestedBlockIndent < Other.NestedBlockIndent;
339    if (FirstLessLess != Other.FirstLessLess)
340      return FirstLessLess < Other.FirstLessLess;
341    if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
342      return BreakBeforeClosingBrace;
343    if (QuestionColumn != Other.QuestionColumn)
344      return QuestionColumn < Other.QuestionColumn;
345    if (AvoidBinPacking != Other.AvoidBinPacking)
346      return AvoidBinPacking;
347    if (BreakBeforeParameter != Other.BreakBeforeParameter)
348      return BreakBeforeParameter;
349    if (NoLineBreak != Other.NoLineBreak)
350      return NoLineBreak;
351    if (LastOperatorWrapped != Other.LastOperatorWrapped)
352      return LastOperatorWrapped;
353    if (ColonPos != Other.ColonPos)
354      return ColonPos < Other.ColonPos;
355    if (StartOfFunctionCall != Other.StartOfFunctionCall)
356      return StartOfFunctionCall < Other.StartOfFunctionCall;
357    if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
358      return StartOfArraySubscripts < Other.StartOfArraySubscripts;
359    if (CallContinuation != Other.CallContinuation)
360      return CallContinuation < Other.CallContinuation;
361    if (VariablePos != Other.VariablePos)
362      return VariablePos < Other.VariablePos;
363    if (ContainsLineBreak != Other.ContainsLineBreak)
364      return ContainsLineBreak;
365    if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
366      return ContainsUnwrappedBuilder;
367    if (NestedBlockInlined != Other.NestedBlockInlined)
368      return NestedBlockInlined;
369    return false;
370  }
371};
372
373/// The current state when indenting a unwrapped line.
374///
375/// As the indenting tries different combinations this is copied by value.
376struct LineState {
377  /// The number of used columns in the current line.
378  unsigned Column;
379
380  /// The token that needs to be next formatted.
381  FormatToken *NextToken;
382
383  /// \c true if this line contains a continued for-loop section.
384  bool LineContainsContinuedForLoopSection;
385
386  /// \c true if \p NextToken should not continue this line.
387  bool NoContinuation;
388
389  /// The \c NestingLevel at the start of this line.
390  unsigned StartOfLineLevel;
391
392  /// The lowest \c NestingLevel on the current line.
393  unsigned LowestLevelOnLine;
394
395  /// The start column of the string literal, if we're in a string
396  /// literal sequence, 0 otherwise.
397  unsigned StartOfStringLiteral;
398
399  /// A stack keeping track of properties applying to parenthesis
400  /// levels.
401  std::vector<ParenState> Stack;
402
403  /// Ignore the stack of \c ParenStates for state comparison.
404  ///
405  /// In long and deeply nested unwrapped lines, the current algorithm can
406  /// be insufficient for finding the best formatting with a reasonable amount
407  /// of time and memory. Setting this flag will effectively lead to the
408  /// algorithm not analyzing some combinations. However, these combinations
409  /// rarely contain the optimal solution: In short, accepting a higher
410  /// penalty early would need to lead to different values in the \c
411  /// ParenState stack (in an otherwise identical state) and these different
412  /// values would need to lead to a significant amount of avoided penalty
413  /// later.
414  ///
415  /// FIXME: Come up with a better algorithm instead.
416  bool IgnoreStackForComparison;
417
418  /// The indent of the first token.
419  unsigned FirstIndent;
420
421  /// The line that is being formatted.
422  ///
423  /// Does not need to be considered for memoization because it doesn't change.
424  const AnnotatedLine *Line;
425
426  /// Comparison operator to be able to used \c LineState in \c map.
427  bool operator<(const LineState &Other) const {
428    if (NextToken != Other.NextToken)
429      return NextToken < Other.NextToken;
430    if (Column != Other.Column)
431      return Column < Other.Column;
432    if (LineContainsContinuedForLoopSection !=
433        Other.LineContainsContinuedForLoopSection)
434      return LineContainsContinuedForLoopSection;
435    if (NoContinuation != Other.NoContinuation)
436      return NoContinuation;
437    if (StartOfLineLevel != Other.StartOfLineLevel)
438      return StartOfLineLevel < Other.StartOfLineLevel;
439    if (LowestLevelOnLine != Other.LowestLevelOnLine)
440      return LowestLevelOnLine < Other.LowestLevelOnLine;
441    if (StartOfStringLiteral != Other.StartOfStringLiteral)
442      return StartOfStringLiteral < Other.StartOfStringLiteral;
443    if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
444      return false;
445    return Stack < Other.Stack;
446  }
447};
448
449} // end namespace format
450} // end namespace clang
451
452#endif
453