1//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements an indenter that manages the indentation of
11/// continuations.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17
18#include "Encoding.h"
19#include "FormatToken.h"
20#include "clang/Format/Format.h"
21#include "llvm/Support/Regex.h"
22#include <map>
23#include <optional>
24#include <tuple>
25
26namespace clang {
27class SourceManager;
28
29namespace format {
30
31class AnnotatedLine;
32class BreakableToken;
33struct FormatToken;
34struct LineState;
35struct ParenState;
36struct RawStringFormatStyleManager;
37class WhitespaceManager;
38
39struct RawStringFormatStyleManager {
40  llvm::StringMap<FormatStyle> DelimiterStyle;
41  llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
42
43  RawStringFormatStyleManager(const FormatStyle &CodeStyle);
44
45  std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
46
47  std::optional<FormatStyle>
48  getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
49};
50
51class ContinuationIndenter {
52public:
53  /// Constructs a \c ContinuationIndenter to format \p Line starting in
54  /// column \p FirstIndent.
55  ContinuationIndenter(const FormatStyle &Style,
56                       const AdditionalKeywords &Keywords,
57                       const SourceManager &SourceMgr,
58                       WhitespaceManager &Whitespaces,
59                       encoding::Encoding Encoding,
60                       bool BinPackInconclusiveFunctions);
61
62  /// Get the initial state, i.e. the state after placing \p Line's
63  /// first token at \p FirstIndent. When reformatting a fragment of code, as in
64  /// the case of formatting inside raw string literals, \p FirstStartColumn is
65  /// the column at which the state of the parent formatter is.
66  LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
67                            const AnnotatedLine *Line, bool DryRun);
68
69  // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
70  // better home.
71  /// Returns \c true, if a line break after \p State is allowed.
72  bool canBreak(const LineState &State);
73
74  /// Returns \c true, if a line break after \p State is mandatory.
75  bool mustBreak(const LineState &State);
76
77  /// Appends the next token to \p State and updates information
78  /// necessary for indentation.
79  ///
80  /// Puts the token on the current line if \p Newline is \c false and adds a
81  /// line break and necessary indentation otherwise.
82  ///
83  /// If \p DryRun is \c false, also creates and stores the required
84  /// \c Replacement.
85  unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
86                           unsigned ExtraSpaces = 0);
87
88  /// Get the column limit for this line. This is the style's column
89  /// limit, potentially reduced for preprocessor definitions.
90  unsigned getColumnLimit(const LineState &State) const;
91
92private:
93  /// Mark the next token as consumed in \p State and modify its stacks
94  /// accordingly.
95  unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
96
97  /// Update 'State' according to the next token's fake left parentheses.
98  void moveStatePastFakeLParens(LineState &State, bool Newline);
99  /// Update 'State' according to the next token's fake r_parens.
100  void moveStatePastFakeRParens(LineState &State);
101
102  /// Update 'State' according to the next token being one of "(<{[".
103  void moveStatePastScopeOpener(LineState &State, bool Newline);
104  /// Update 'State' according to the next token being one of ")>}]".
105  void moveStatePastScopeCloser(LineState &State);
106  /// Update 'State' with the next token opening a nested block.
107  void moveStateToNewBlock(LineState &State);
108
109  /// Reformats a raw string literal.
110  ///
111  /// \returns An extra penalty induced by reformatting the token.
112  unsigned reformatRawStringLiteral(const FormatToken &Current,
113                                    LineState &State,
114                                    const FormatStyle &RawStringStyle,
115                                    bool DryRun, bool Newline);
116
117  /// If the current token is at the end of the current line, handle
118  /// the transition to the next line.
119  unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
120                           bool DryRun, bool AllowBreak, bool Newline);
121
122  /// If \p Current is a raw string that is configured to be reformatted,
123  /// return the style to be used.
124  std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
125                                               const LineState &State);
126
127  /// If the current token sticks out over the end of the line, break
128  /// it if possible.
129  ///
130  /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
131  /// when tokens are broken or lines exceed the column limit, and exceeded
132  /// indicates whether the algorithm purposefully left lines exceeding the
133  /// column limit.
134  ///
135  /// The returned penalty will cover the cost of the additional line breaks
136  /// and column limit violation in all lines except for the last one. The
137  /// penalty for the column limit violation in the last line (and in single
138  /// line tokens) is handled in \c addNextStateToQueue.
139  ///
140  /// \p Strict indicates whether reflowing is allowed to leave characters
141  /// protruding the column limit; if true, lines will be split strictly within
142  /// the column limit where possible; if false, words are allowed to protrude
143  /// over the column limit as long as the penalty is less than the penalty
144  /// of a break.
145  std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
146                                                 LineState &State,
147                                                 bool AllowBreak, bool DryRun,
148                                                 bool Strict);
149
150  /// Returns the \c BreakableToken starting at \p Current, or nullptr
151  /// if the current token cannot be broken.
152  std::unique_ptr<BreakableToken>
153  createBreakableToken(const FormatToken &Current, LineState &State,
154                       bool AllowBreak);
155
156  /// Appends the next token to \p State and updates information
157  /// necessary for indentation.
158  ///
159  /// Puts the token on the current line.
160  ///
161  /// If \p DryRun is \c false, also creates and stores the required
162  /// \c Replacement.
163  void addTokenOnCurrentLine(LineState &State, bool DryRun,
164                             unsigned ExtraSpaces);
165
166  /// Appends the next token to \p State and updates information
167  /// necessary for indentation.
168  ///
169  /// Adds a line break and necessary indentation.
170  ///
171  /// If \p DryRun is \c false, also creates and stores the required
172  /// \c Replacement.
173  unsigned addTokenOnNewLine(LineState &State, bool DryRun);
174
175  /// Calculate the new column for a line wrap before the next token.
176  unsigned getNewLineColumn(const LineState &State);
177
178  /// Adds a multiline token to the \p State.
179  ///
180  /// \returns Extra penalty for the first line of the literal: last line is
181  /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
182  /// matter, as we don't change them.
183  unsigned addMultilineToken(const FormatToken &Current, LineState &State);
184
185  /// Returns \c true if the next token starts a multiline string
186  /// literal.
187  ///
188  /// This includes implicitly concatenated strings, strings that will be broken
189  /// by clang-format and string literals with escaped newlines.
190  bool nextIsMultilineString(const LineState &State);
191
192  FormatStyle Style;
193  const AdditionalKeywords &Keywords;
194  const SourceManager &SourceMgr;
195  WhitespaceManager &Whitespaces;
196  encoding::Encoding Encoding;
197  bool BinPackInconclusiveFunctions;
198  llvm::Regex CommentPragmasRegex;
199  const RawStringFormatStyleManager RawStringFormats;
200};
201
202struct ParenState {
203  ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
204             bool AvoidBinPacking, bool NoLineBreak)
205      : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
206        NestedBlockIndent(Indent), IsAligned(false),
207        BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false),
208        AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
209        NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
210        LastOperatorWrapped(true), ContainsLineBreak(false),
211        ContainsUnwrappedBuilder(false), AlignColons(true),
212        ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
213        NestedBlockInlined(false), IsInsideObjCArrayLiteral(false),
214        IsCSharpGenericTypeConstraint(false), IsChainedConditional(false),
215        IsWrappedConditional(false), UnindentOperator(false) {}
216
217  /// \brief The token opening this parenthesis level, or nullptr if this level
218  /// is opened by fake parenthesis.
219  ///
220  /// Not considered for memoization as it will always have the same value at
221  /// the same token.
222  const FormatToken *Tok;
223
224  /// The position to which a specific parenthesis level needs to be
225  /// indented.
226  unsigned Indent;
227
228  /// The position of the last space on each level.
229  ///
230  /// Used e.g. to break like:
231  /// functionCall(Parameter, otherCall(
232  ///                             OtherParameter));
233  unsigned LastSpace;
234
235  /// If a block relative to this parenthesis level gets wrapped, indent
236  /// it this much.
237  unsigned NestedBlockIndent;
238
239  /// The position the first "<<" operator encountered on each level.
240  ///
241  /// Used to align "<<" operators. 0 if no such operator has been encountered
242  /// on a level.
243  unsigned FirstLessLess = 0;
244
245  /// The column of a \c ? in a conditional expression;
246  unsigned QuestionColumn = 0;
247
248  /// The position of the colon in an ObjC method declaration/call.
249  unsigned ColonPos = 0;
250
251  /// The start of the most recent function in a builder-type call.
252  unsigned StartOfFunctionCall = 0;
253
254  /// Contains the start of array subscript expressions, so that they
255  /// can be aligned.
256  unsigned StartOfArraySubscripts = 0;
257
258  /// If a nested name specifier was broken over multiple lines, this
259  /// contains the start column of the second line. Otherwise 0.
260  unsigned NestedNameSpecifierContinuation = 0;
261
262  /// If a call expression was broken over multiple lines, this
263  /// contains the start column of the second line. Otherwise 0.
264  unsigned CallContinuation = 0;
265
266  /// The column of the first variable name in a variable declaration.
267  ///
268  /// Used to align further variables if necessary.
269  unsigned VariablePos = 0;
270
271  /// Whether this block's indentation is used for alignment.
272  bool IsAligned : 1;
273
274  /// Whether a newline needs to be inserted before the block's closing
275  /// brace.
276  ///
277  /// We only want to insert a newline before the closing brace if there also
278  /// was a newline after the beginning left brace.
279  bool BreakBeforeClosingBrace : 1;
280
281  /// Whether a newline needs to be inserted before the block's closing
282  /// paren.
283  ///
284  /// We only want to insert a newline before the closing paren if there also
285  /// was a newline after the beginning left paren.
286  bool BreakBeforeClosingParen : 1;
287
288  /// Avoid bin packing, i.e. multiple parameters/elements on multiple
289  /// lines, in this context.
290  bool AvoidBinPacking : 1;
291
292  /// Break after the next comma (or all the commas in this context if
293  /// \c AvoidBinPacking is \c true).
294  bool BreakBeforeParameter : 1;
295
296  /// Line breaking in this context would break a formatting rule.
297  bool NoLineBreak : 1;
298
299  /// Same as \c NoLineBreak, but is restricted until the end of the
300  /// operand (including the next ",").
301  bool NoLineBreakInOperand : 1;
302
303  /// True if the last binary operator on this level was wrapped to the
304  /// next line.
305  bool LastOperatorWrapped : 1;
306
307  /// \c true if this \c ParenState already contains a line-break.
308  ///
309  /// The first line break in a certain \c ParenState causes extra penalty so
310  /// that clang-format prefers similar breaks, i.e. breaks in the same
311  /// parenthesis.
312  bool ContainsLineBreak : 1;
313
314  /// \c true if this \c ParenState contains multiple segments of a
315  /// builder-type call on one line.
316  bool ContainsUnwrappedBuilder : 1;
317
318  /// \c true if the colons of the curren ObjC method expression should
319  /// be aligned.
320  ///
321  /// Not considered for memoization as it will always have the same value at
322  /// the same token.
323  bool AlignColons : 1;
324
325  /// \c true if at least one selector name was found in the current
326  /// ObjC method expression.
327  ///
328  /// Not considered for memoization as it will always have the same value at
329  /// the same token.
330  bool ObjCSelectorNameFound : 1;
331
332  /// \c true if there are multiple nested blocks inside these parens.
333  ///
334  /// Not considered for memoization as it will always have the same value at
335  /// the same token.
336  bool HasMultipleNestedBlocks : 1;
337
338  /// The start of a nested block (e.g. lambda introducer in C++ or
339  /// "function" in JavaScript) is not wrapped to a new line.
340  bool NestedBlockInlined : 1;
341
342  /// \c true if the current \c ParenState represents an Objective-C
343  /// array literal.
344  bool IsInsideObjCArrayLiteral : 1;
345
346  bool IsCSharpGenericTypeConstraint : 1;
347
348  /// \brief true if the current \c ParenState represents the false branch of
349  /// a chained conditional expression (e.g. else-if)
350  bool IsChainedConditional : 1;
351
352  /// \brief true if there conditionnal was wrapped on the first operator (the
353  /// question mark)
354  bool IsWrappedConditional : 1;
355
356  /// \brief Indicates the indent should be reduced by the length of the
357  /// operator.
358  bool UnindentOperator : 1;
359
360  bool operator<(const ParenState &Other) const {
361    if (Indent != Other.Indent)
362      return Indent < Other.Indent;
363    if (LastSpace != Other.LastSpace)
364      return LastSpace < Other.LastSpace;
365    if (NestedBlockIndent != Other.NestedBlockIndent)
366      return NestedBlockIndent < Other.NestedBlockIndent;
367    if (FirstLessLess != Other.FirstLessLess)
368      return FirstLessLess < Other.FirstLessLess;
369    if (IsAligned != Other.IsAligned)
370      return IsAligned;
371    if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
372      return BreakBeforeClosingBrace;
373    if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
374      return BreakBeforeClosingParen;
375    if (QuestionColumn != Other.QuestionColumn)
376      return QuestionColumn < Other.QuestionColumn;
377    if (AvoidBinPacking != Other.AvoidBinPacking)
378      return AvoidBinPacking;
379    if (BreakBeforeParameter != Other.BreakBeforeParameter)
380      return BreakBeforeParameter;
381    if (NoLineBreak != Other.NoLineBreak)
382      return NoLineBreak;
383    if (LastOperatorWrapped != Other.LastOperatorWrapped)
384      return LastOperatorWrapped;
385    if (ColonPos != Other.ColonPos)
386      return ColonPos < Other.ColonPos;
387    if (StartOfFunctionCall != Other.StartOfFunctionCall)
388      return StartOfFunctionCall < Other.StartOfFunctionCall;
389    if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
390      return StartOfArraySubscripts < Other.StartOfArraySubscripts;
391    if (CallContinuation != Other.CallContinuation)
392      return CallContinuation < Other.CallContinuation;
393    if (VariablePos != Other.VariablePos)
394      return VariablePos < Other.VariablePos;
395    if (ContainsLineBreak != Other.ContainsLineBreak)
396      return ContainsLineBreak;
397    if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
398      return ContainsUnwrappedBuilder;
399    if (NestedBlockInlined != Other.NestedBlockInlined)
400      return NestedBlockInlined;
401    if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
402      return IsCSharpGenericTypeConstraint;
403    if (IsChainedConditional != Other.IsChainedConditional)
404      return IsChainedConditional;
405    if (IsWrappedConditional != Other.IsWrappedConditional)
406      return IsWrappedConditional;
407    if (UnindentOperator != Other.UnindentOperator)
408      return UnindentOperator;
409    return false;
410  }
411};
412
413/// The current state when indenting a unwrapped line.
414///
415/// As the indenting tries different combinations this is copied by value.
416struct LineState {
417  /// The number of used columns in the current line.
418  unsigned Column;
419
420  /// The token that needs to be next formatted.
421  FormatToken *NextToken;
422
423  /// \c true if \p NextToken should not continue this line.
424  bool NoContinuation;
425
426  /// The \c NestingLevel at the start of this line.
427  unsigned StartOfLineLevel;
428
429  /// The lowest \c NestingLevel on the current line.
430  unsigned LowestLevelOnLine;
431
432  /// The start column of the string literal, if we're in a string
433  /// literal sequence, 0 otherwise.
434  unsigned StartOfStringLiteral;
435
436  /// Disallow line breaks for this line.
437  bool NoLineBreak;
438
439  /// A stack keeping track of properties applying to parenthesis
440  /// levels.
441  SmallVector<ParenState> Stack;
442
443  /// Ignore the stack of \c ParenStates for state comparison.
444  ///
445  /// In long and deeply nested unwrapped lines, the current algorithm can
446  /// be insufficient for finding the best formatting with a reasonable amount
447  /// of time and memory. Setting this flag will effectively lead to the
448  /// algorithm not analyzing some combinations. However, these combinations
449  /// rarely contain the optimal solution: In short, accepting a higher
450  /// penalty early would need to lead to different values in the \c
451  /// ParenState stack (in an otherwise identical state) and these different
452  /// values would need to lead to a significant amount of avoided penalty
453  /// later.
454  ///
455  /// FIXME: Come up with a better algorithm instead.
456  bool IgnoreStackForComparison;
457
458  /// The indent of the first token.
459  unsigned FirstIndent;
460
461  /// The line that is being formatted.
462  ///
463  /// Does not need to be considered for memoization because it doesn't change.
464  const AnnotatedLine *Line;
465
466  /// Comparison operator to be able to used \c LineState in \c map.
467  bool operator<(const LineState &Other) const {
468    if (NextToken != Other.NextToken)
469      return NextToken < Other.NextToken;
470    if (Column != Other.Column)
471      return Column < Other.Column;
472    if (NoContinuation != Other.NoContinuation)
473      return NoContinuation;
474    if (StartOfLineLevel != Other.StartOfLineLevel)
475      return StartOfLineLevel < Other.StartOfLineLevel;
476    if (LowestLevelOnLine != Other.LowestLevelOnLine)
477      return LowestLevelOnLine < Other.LowestLevelOnLine;
478    if (StartOfStringLiteral != Other.StartOfStringLiteral)
479      return StartOfStringLiteral < Other.StartOfStringLiteral;
480    if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
481      return false;
482    return Stack < Other.Stack;
483  }
484};
485
486} // end namespace format
487} // end namespace clang
488
489#endif
490