1//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a token annotator, i.e. creates
11/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17
18#include "UnwrappedLineParser.h"
19#include "clang/Format/Format.h"
20
21namespace clang {
22namespace format {
23
24enum LineType {
25  LT_Invalid,
26  LT_ImportStatement,
27  LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
28  LT_ObjCMethodDecl,
29  LT_ObjCProperty, // An @property line.
30  LT_Other,
31  LT_PreprocessorDirective,
32  LT_VirtualFunctionDecl,
33  LT_ArrayOfStructInitializer,
34  LT_CommentAbovePPDirective,
35};
36
37enum ScopeType {
38  // Contained in class declaration/definition.
39  ST_Class,
40  // Contained within function definition.
41  ST_Function,
42  // Contained within other scope block (loop, if/else, etc).
43  ST_Other,
44};
45
46class AnnotatedLine {
47public:
48  AnnotatedLine(const UnwrappedLine &Line)
49      : First(Line.Tokens.front().Tok), Level(Line.Level),
50        PPLevel(Line.PPLevel),
51        MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
52        MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),
53        InPPDirective(Line.InPPDirective),
54        InPragmaDirective(Line.InPragmaDirective),
55        InMacroBody(Line.InMacroBody),
56        MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
57        IsMultiVariableDeclStmt(false), Affected(false),
58        LeadingEmptyLinesAffected(false), ChildrenAffected(false),
59        ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation),
60        FirstStartColumn(Line.FirstStartColumn) {
61    assert(!Line.Tokens.empty());
62
63    // Calculate Next and Previous for all tokens. Note that we must overwrite
64    // Next and Previous for every token, as previous formatting runs might have
65    // left them in a different state.
66    First->Previous = nullptr;
67    FormatToken *Current = First;
68    addChildren(Line.Tokens.front(), Current);
69    for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
70      if (Node.Tok->MacroParent)
71        ContainsMacroCall = true;
72      Current->Next = Node.Tok;
73      Node.Tok->Previous = Current;
74      Current = Current->Next;
75      addChildren(Node, Current);
76      // FIXME: if we add children, previous will point to the token before
77      // the children; changing this requires significant changes across
78      // clang-format.
79    }
80    Last = Current;
81    Last->Next = nullptr;
82  }
83
84  void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
85    Current->Children.clear();
86    for (const auto &Child : Node.Children) {
87      Children.push_back(new AnnotatedLine(Child));
88      if (Children.back()->ContainsMacroCall)
89        ContainsMacroCall = true;
90      Current->Children.push_back(Children.back());
91    }
92  }
93
94  size_t size() const {
95    size_t Size = 1;
96    for (const auto *Child : Children)
97      Size += Child->size();
98    return Size;
99  }
100
101  ~AnnotatedLine() {
102    for (AnnotatedLine *Child : Children)
103      delete Child;
104    FormatToken *Current = First;
105    while (Current) {
106      Current->Children.clear();
107      Current->Role.reset();
108      Current = Current->Next;
109    }
110  }
111
112  bool isComment() const {
113    return First && First->is(tok::comment) && !First->getNextNonComment();
114  }
115
116  /// \c true if this line starts with the given tokens in order, ignoring
117  /// comments.
118  template <typename... Ts> bool startsWith(Ts... Tokens) const {
119    return First && First->startsSequence(Tokens...);
120  }
121
122  /// \c true if this line ends with the given tokens in reversed order,
123  /// ignoring comments.
124  /// For example, given tokens [T1, T2, T3, ...], the function returns true if
125  /// this line is like "... T3 T2 T1".
126  template <typename... Ts> bool endsWith(Ts... Tokens) const {
127    return Last && Last->endsSequence(Tokens...);
128  }
129
130  /// \c true if this line looks like a function definition instead of a
131  /// function declaration. Asserts MightBeFunctionDecl.
132  bool mightBeFunctionDefinition() const {
133    assert(MightBeFunctionDecl);
134    // Try to determine if the end of a stream of tokens is either the
135    // Definition or the Declaration for a function. It does this by looking for
136    // the ';' in foo(); and using that it ends with a ; to know this is the
137    // Definition, however the line could end with
138    //    foo(); /* comment */
139    // or
140    //    foo(); // comment
141    // or
142    //    foo() // comment
143    // endsWith() ignores the comment.
144    return !endsWith(tok::semi);
145  }
146
147  /// \c true if this line starts a namespace definition.
148  bool startsWithNamespace() const {
149    return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
150           startsWith(tok::kw_inline, tok::kw_namespace) ||
151           startsWith(tok::kw_export, tok::kw_namespace);
152  }
153
154  FormatToken *getFirstNonComment() const {
155    assert(First);
156    return First->is(tok::comment) ? First->getNextNonComment() : First;
157  }
158
159  FormatToken *getLastNonComment() const {
160    assert(Last);
161    return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last;
162  }
163
164  FormatToken *First;
165  FormatToken *Last;
166
167  SmallVector<AnnotatedLine *, 0> Children;
168
169  LineType Type;
170  unsigned Level;
171  unsigned PPLevel;
172  size_t MatchingOpeningBlockLineIndex;
173  size_t MatchingClosingBlockLineIndex;
174  bool InPPDirective;
175  bool InPragmaDirective;
176  bool InMacroBody;
177  bool MustBeDeclaration;
178  bool MightBeFunctionDecl;
179  bool IsMultiVariableDeclStmt;
180
181  /// \c True if this line contains a macro call for which an expansion exists.
182  bool ContainsMacroCall = false;
183
184  /// \c True if this line should be formatted, i.e. intersects directly or
185  /// indirectly with one of the input ranges.
186  bool Affected;
187
188  /// \c True if the leading empty lines of this line intersect with one of the
189  /// input ranges.
190  bool LeadingEmptyLinesAffected;
191
192  /// \c True if one of this line's children intersects with an input range.
193  bool ChildrenAffected;
194
195  /// \c True if breaking after last attribute group in function return type.
196  bool ReturnTypeWrapped;
197
198  /// \c True if this line should be indented by ContinuationIndent in addition
199  /// to the normal indention level.
200  bool IsContinuation;
201
202  unsigned FirstStartColumn;
203
204private:
205  // Disallow copying.
206  AnnotatedLine(const AnnotatedLine &) = delete;
207  void operator=(const AnnotatedLine &) = delete;
208};
209
210/// Determines extra information about the tokens comprising an
211/// \c UnwrappedLine.
212class TokenAnnotator {
213public:
214  TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
215      : Style(Style), Keywords(Keywords) {}
216
217  /// Adapts the indent levels of comment lines to the indent of the
218  /// subsequent line.
219  // FIXME: Can/should this be done in the UnwrappedLineParser?
220  void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const;
221
222  void annotate(AnnotatedLine &Line);
223  void calculateFormattingInformation(AnnotatedLine &Line) const;
224
225private:
226  /// Calculate the penalty for splitting before \c Tok.
227  unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
228                        bool InFunctionDecl) const;
229
230  bool spaceRequiredBeforeParens(const FormatToken &Right) const;
231
232  bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
233                            const FormatToken &Right) const;
234
235  bool spaceRequiredBefore(const AnnotatedLine &Line,
236                           const FormatToken &Right) const;
237
238  bool mustBreakBefore(const AnnotatedLine &Line,
239                       const FormatToken &Right) const;
240
241  bool canBreakBefore(const AnnotatedLine &Line,
242                      const FormatToken &Right) const;
243
244  bool mustBreakForReturnType(const AnnotatedLine &Line) const;
245
246  void printDebugInfo(const AnnotatedLine &Line) const;
247
248  void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
249
250  void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
251
252  FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
253                                              FormatToken *CurrentToken,
254                                              unsigned Depth) const;
255  FormatStyle::PointerAlignmentStyle
256  getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
257
258  FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
259      const FormatToken &PointerOrReference) const;
260
261  const FormatStyle &Style;
262
263  const AdditionalKeywords &Keywords;
264
265  SmallVector<ScopeType> Scopes;
266};
267
268} // end namespace format
269} // end namespace clang
270
271#endif
272