BreakableToken.h revision 263508
1//===--- BreakableToken.h - Format C++ code -------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Declares BreakableToken, BreakableStringLiteral, and
12/// BreakableBlockComment classes, that contain token type-specific logic to
13/// break long lines in tokens.
14///
15//===----------------------------------------------------------------------===//
16
17#ifndef LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
18#define LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
19
20#include "Encoding.h"
21#include "TokenAnnotator.h"
22#include "WhitespaceManager.h"
23#include <utility>
24
25namespace clang {
26namespace format {
27
28struct FormatStyle;
29
30/// \brief Base class for strategies on how to break tokens.
31///
32/// FIXME: The interface seems set in stone, so we might want to just pull the
33/// strategy into the class, instead of controlling it from the outside.
34class BreakableToken {
35public:
36  /// \brief Contains starting character index and length of split.
37  typedef std::pair<StringRef::size_type, unsigned> Split;
38
39  virtual ~BreakableToken() {}
40
41  /// \brief Returns the number of lines in this token in the original code.
42  virtual unsigned getLineCount() const = 0;
43
44  /// \brief Returns the number of columns required to format the piece of line
45  /// at \p LineIndex, from byte offset \p Offset with length \p Length.
46  ///
47  /// Note that previous breaks are not taken into account. \p Offset is always
48  /// specified from the start of the (original) line.
49  /// \p Length can be set to StringRef::npos, which means "to the end of line".
50  virtual unsigned
51  getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset,
52                          StringRef::size_type Length) const = 0;
53
54  /// \brief Returns a range (offset, length) at which to break the line at
55  /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
56  /// violate \p ColumnLimit.
57  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
58                         unsigned ColumnLimit) const = 0;
59
60  /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
61  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
62                           WhitespaceManager &Whitespaces) = 0;
63
64  /// \brief Replaces the whitespace range described by \p Split with a single
65  /// space.
66  virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
67                                 Split Split,
68                                 WhitespaceManager &Whitespaces) = 0;
69
70  /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
71  virtual void replaceWhitespaceBefore(unsigned LineIndex,
72                                       WhitespaceManager &Whitespaces) {}
73
74protected:
75  BreakableToken(const FormatToken &Tok, unsigned IndentLevel,
76                 bool InPPDirective, encoding::Encoding Encoding,
77                 const FormatStyle &Style)
78      : Tok(Tok), IndentLevel(IndentLevel), InPPDirective(InPPDirective),
79        Encoding(Encoding), Style(Style) {}
80
81  const FormatToken &Tok;
82  const unsigned IndentLevel;
83  const bool InPPDirective;
84  const encoding::Encoding Encoding;
85  const FormatStyle &Style;
86};
87
88/// \brief Base class for single line tokens that can be broken.
89///
90/// \c getSplit() needs to be implemented by child classes.
91class BreakableSingleLineToken : public BreakableToken {
92public:
93  virtual unsigned getLineCount() const;
94  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
95                                           unsigned TailOffset,
96                                           StringRef::size_type Length) const;
97
98protected:
99  BreakableSingleLineToken(const FormatToken &Tok, unsigned IndentLevel,
100                           unsigned StartColumn, StringRef Prefix,
101                           StringRef Postfix, bool InPPDirective,
102                           encoding::Encoding Encoding,
103                           const FormatStyle &Style);
104
105  // The column in which the token starts.
106  unsigned StartColumn;
107  // The prefix a line needs after a break in the token.
108  StringRef Prefix;
109  // The postfix a line needs before introducing a break.
110  StringRef Postfix;
111  // The token text excluding the prefix and postfix.
112  StringRef Line;
113};
114
115class BreakableStringLiteral : public BreakableSingleLineToken {
116public:
117  /// \brief Creates a breakable token for a single line string literal.
118  ///
119  /// \p StartColumn specifies the column in which the token will start
120  /// after formatting.
121  BreakableStringLiteral(const FormatToken &Tok, unsigned IndentLevel,
122                         unsigned StartColumn, StringRef Prefix,
123                         StringRef Postfix, bool InPPDirective,
124                         encoding::Encoding Encoding, const FormatStyle &Style);
125
126  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
127                         unsigned ColumnLimit) const;
128  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
129                           WhitespaceManager &Whitespaces);
130  virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
131                                 Split Split,
132                                 WhitespaceManager &Whitespaces) {}
133};
134
135class BreakableLineComment : public BreakableSingleLineToken {
136public:
137  /// \brief Creates a breakable token for a line comment.
138  ///
139  /// \p StartColumn specifies the column in which the comment will start
140  /// after formatting.
141  BreakableLineComment(const FormatToken &Token, unsigned IndentLevel,
142                       unsigned StartColumn, bool InPPDirective,
143                       encoding::Encoding Encoding, const FormatStyle &Style);
144
145  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
146                         unsigned ColumnLimit) const;
147  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
148                           WhitespaceManager &Whitespaces);
149  virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
150                                 Split Split,
151                                 WhitespaceManager &Whitespaces);
152  virtual void replaceWhitespaceBefore(unsigned LineIndex,
153                                       WhitespaceManager &Whitespaces);
154
155private:
156  // The prefix without an additional space if one was added.
157  StringRef OriginalPrefix;
158};
159
160class BreakableBlockComment : public BreakableToken {
161public:
162  /// \brief Creates a breakable token for a block comment.
163  ///
164  /// \p StartColumn specifies the column in which the comment will start
165  /// after formatting, while \p OriginalStartColumn specifies in which
166  /// column the comment started before formatting.
167  /// If the comment starts a line after formatting, set \p FirstInLine to true.
168  BreakableBlockComment(const FormatToken &Token, unsigned IndentLevel,
169                        unsigned StartColumn, unsigned OriginaStartColumn,
170                        bool FirstInLine, bool InPPDirective,
171                        encoding::Encoding Encoding, const FormatStyle &Style);
172
173  virtual unsigned getLineCount() const;
174  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
175                                           unsigned TailOffset,
176                                           StringRef::size_type Length) const;
177  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
178                         unsigned ColumnLimit) const;
179  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
180                           WhitespaceManager &Whitespaces);
181  virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
182                                 Split Split,
183                                 WhitespaceManager &Whitespaces);
184  virtual void replaceWhitespaceBefore(unsigned LineIndex,
185                                       WhitespaceManager &Whitespaces);
186
187private:
188  // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
189  // so that all whitespace between the lines is accounted to Lines[LineIndex]
190  // as leading whitespace:
191  // - Lines[LineIndex] points to the text after that whitespace
192  // - Lines[LineIndex-1] shrinks by its trailing whitespace
193  // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
194  //   between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
195  //
196  // Sets StartOfLineColumn to the intended column in which the text at
197  // Lines[LineIndex] starts (note that the decoration, if present, is not
198  // considered part of the text).
199  void adjustWhitespace(unsigned LineIndex, int IndentDelta);
200
201  // Returns the column at which the text in line LineIndex starts, when broken
202  // at TailOffset. Note that the decoration (if present) is not considered part
203  // of the text.
204  unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
205
206  // Contains the text of the lines of the block comment, excluding the leading
207  // /* in the first line and trailing */ in the last line, and excluding all
208  // trailing whitespace between the lines. Note that the decoration (if
209  // present) is also not considered part of the text.
210  SmallVector<StringRef, 16> Lines;
211
212  // LeadingWhitespace[i] is the number of characters regarded as whitespace in
213  // front of Lines[i]. Note that this can include "* " sequences, which we
214  // regard as whitespace when all lines have a "*" prefix.
215  SmallVector<unsigned, 16> LeadingWhitespace;
216
217  // StartOfLineColumn[i] is the target column at which Line[i] should be.
218  // Note that this excludes a leading "* " or "*" in case all lines have
219  // a "*" prefix.
220  SmallVector<unsigned, 16> StartOfLineColumn;
221
222  // The column at which the text of a broken line should start.
223  // Note that an optional decoration would go before that column.
224  // IndentAtLineBreak is a uniform position for all lines in a block comment,
225  // regardless of their relative position.
226  // FIXME: Revisit the decision to do this; the main reason was to support
227  // patterns like
228  // /**************//**
229  //  * Comment
230  // We could also support such patterns by special casing the first line
231  // instead.
232  unsigned IndentAtLineBreak;
233
234  // This is to distinguish between the case when the last line was empty and
235  // the case when it started with a decoration ("*" or "* ").
236  bool LastLineNeedsDecoration;
237
238  // Either "* " if all lines begin with a "*", or empty.
239  StringRef Decoration;
240};
241
242} // namespace format
243} // namespace clang
244
245#endif // LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
246