11539Srgrimes//===--- BreakableToken.h - Format C++ code -------------------------------===//
21539Srgrimes//
31539Srgrimes//                     The LLVM Compiler Infrastructure
41539Srgrimes//
51539Srgrimes// This file is distributed under the University of Illinois Open Source
61539Srgrimes// License. See LICENSE.TXT for details.
71539Srgrimes//
81539Srgrimes//===----------------------------------------------------------------------===//
91539Srgrimes///
101539Srgrimes/// \file
111539Srgrimes/// \brief Declares BreakableToken, BreakableStringLiteral, and
121539Srgrimes/// BreakableBlockComment classes, that contain token type-specific logic to
131539Srgrimes/// break long lines in tokens.
141539Srgrimes///
151539Srgrimes//===----------------------------------------------------------------------===//
161539Srgrimes
17203964Simp#ifndef LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
181539Srgrimes#define LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
191539Srgrimes
201539Srgrimes#include "Encoding.h"
211539Srgrimes#include "TokenAnnotator.h"
221539Srgrimes#include "WhitespaceManager.h"
231539Srgrimes#include <utility>
241539Srgrimes
251539Srgrimesnamespace clang {
261539Srgrimesnamespace format {
271539Srgrimes
281539Srgrimesstruct FormatStyle;
291539Srgrimes
301539Srgrimes/// \brief Base class for strategies on how to break tokens.
311539Srgrimes///
321539Srgrimes/// FIXME: The interface seems set in stone, so we might want to just pull the
331539Srgrimes/// strategy into the class, instead of controlling it from the outside.
3493032Simpclass BreakableToken {
351539Srgrimespublic:
361539Srgrimes  /// \brief Contains starting character index and length of split.
371539Srgrimes  typedef std::pair<StringRef::size_type, unsigned> Split;
381539Srgrimes
391539Srgrimes  virtual ~BreakableToken() {}
401539Srgrimes
41104356Smike  /// \brief Returns the number of lines in this token in the original code.
421539Srgrimes  virtual unsigned getLineCount() const = 0;
431539Srgrimes
44104356Smike  /// \brief Returns the number of columns required to format the piece of line
451539Srgrimes  /// at \p LineIndex, from byte offset \p Offset with length \p Length.
46104356Smike  ///
47104356Smike  /// Note that previous breaks are not taken into account. \p Offset is always
48104356Smike  /// specified from the start of the (original) line.
49104356Smike  /// \p Length can be set to StringRef::npos, which means "to the end of line".
50104356Smike  virtual unsigned
511539Srgrimes  getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset,
521539Srgrimes                          StringRef::size_type Length) const = 0;
531539Srgrimes
541539Srgrimes  /// \brief Returns a range (offset, length) at which to break the line at
551539Srgrimes  /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
561539Srgrimes  /// violate \p ColumnLimit.
571539Srgrimes  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
581539Srgrimes                         unsigned ColumnLimit) const = 0;
591539Srgrimes
601539Srgrimes  /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
611539Srgrimes  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
621539Srgrimes                           WhitespaceManager &Whitespaces) = 0;
631539Srgrimes
641539Srgrimes  /// \brief Replaces the whitespace range described by \p Split with a single
651539Srgrimes  /// space.
661539Srgrimes  virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
671539Srgrimes                                 Split Split,
681539Srgrimes                                 WhitespaceManager &Whitespaces) = 0;
691539Srgrimes
701539Srgrimes  /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
711539Srgrimes  virtual void replaceWhitespaceBefore(unsigned LineIndex,
721539Srgrimes                                       WhitespaceManager &Whitespaces) {}
731539Srgrimes
74123636Sjkhprotected:
751539Srgrimes  BreakableToken(const FormatToken &Tok, unsigned IndentLevel,
761539Srgrimes                 bool InPPDirective, encoding::Encoding Encoding,
771539Srgrimes                 const FormatStyle &Style)
781539Srgrimes      : Tok(Tok), IndentLevel(IndentLevel), InPPDirective(InPPDirective),
791539Srgrimes        Encoding(Encoding), Style(Style) {}
801539Srgrimes
811539Srgrimes  const FormatToken &Tok;
821539Srgrimes  const unsigned IndentLevel;
831539Srgrimes  const bool InPPDirective;
841539Srgrimes  const encoding::Encoding Encoding;
851539Srgrimes  const FormatStyle &Style;
861539Srgrimes};
871539Srgrimes
881539Srgrimes/// \brief Base class for single line tokens that can be broken.
891539Srgrimes///
901539Srgrimes/// \c getSplit() needs to be implemented by child classes.
91132017Stjrclass BreakableSingleLineToken : public BreakableToken {
921539Srgrimespublic:
931539Srgrimes  virtual unsigned getLineCount() const;
941539Srgrimes  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
951539Srgrimes                                           unsigned TailOffset,
961539Srgrimes                                           StringRef::size_type Length) const;
971539Srgrimes
981539Srgrimesprotected:
991539Srgrimes  BreakableSingleLineToken(const FormatToken &Tok, unsigned IndentLevel,
1001539Srgrimes                           unsigned StartColumn, StringRef Prefix,
1011539Srgrimes                           StringRef Postfix, bool InPPDirective,
1021539Srgrimes                           encoding::Encoding Encoding,
1031539Srgrimes                           const FormatStyle &Style);
104104356Smike
105104356Smike  // The column in which the token starts.
106104416Smike  unsigned StartColumn;
107104416Smike  // The prefix a line needs after a break in the token.
108104416Smike  StringRef Prefix;
109104416Smike  // The postfix a line needs before introducing a break.
110104416Smike  StringRef Postfix;
111104356Smike  // The token text excluding the prefix and postfix.
112104416Smike  StringRef Line;
11393032Simp};
1141539Srgrimes
1151539Srgrimesclass BreakableStringLiteral : public BreakableSingleLineToken {
1161539Srgrimespublic:
117  /// \brief Creates a breakable token for a single line string literal.
118  ///
119  /// \p StartColumn specifies the column in which the token will start
120  /// after formatting.
121  BreakableStringLiteral(const FormatToken &Tok, unsigned IndentLevel,
122                         unsigned StartColumn, StringRef Prefix,
123                         StringRef Postfix, bool InPPDirective,
124                         encoding::Encoding Encoding, const FormatStyle &Style);
125
126  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
127                         unsigned ColumnLimit) const;
128  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
129                           WhitespaceManager &Whitespaces);
130  virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
131                                 Split Split,
132                                 WhitespaceManager &Whitespaces) {}
133};
134
135class BreakableLineComment : public BreakableSingleLineToken {
136public:
137  /// \brief Creates a breakable token for a line comment.
138  ///
139  /// \p StartColumn specifies the column in which the comment will start
140  /// after formatting.
141  BreakableLineComment(const FormatToken &Token, unsigned IndentLevel,
142                       unsigned StartColumn, bool InPPDirective,
143                       encoding::Encoding Encoding, const FormatStyle &Style);
144
145  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
146                         unsigned ColumnLimit) const;
147  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
148                           WhitespaceManager &Whitespaces);
149  virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
150                                 Split Split,
151                                 WhitespaceManager &Whitespaces);
152  virtual void replaceWhitespaceBefore(unsigned LineIndex,
153                                       WhitespaceManager &Whitespaces);
154
155private:
156  // The prefix without an additional space if one was added.
157  StringRef OriginalPrefix;
158};
159
160class BreakableBlockComment : public BreakableToken {
161public:
162  /// \brief Creates a breakable token for a block comment.
163  ///
164  /// \p StartColumn specifies the column in which the comment will start
165  /// after formatting, while \p OriginalStartColumn specifies in which
166  /// column the comment started before formatting.
167  /// If the comment starts a line after formatting, set \p FirstInLine to true.
168  BreakableBlockComment(const FormatToken &Token, unsigned IndentLevel,
169                        unsigned StartColumn, unsigned OriginaStartColumn,
170                        bool FirstInLine, bool InPPDirective,
171                        encoding::Encoding Encoding, const FormatStyle &Style);
172
173  virtual unsigned getLineCount() const;
174  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
175                                           unsigned TailOffset,
176                                           StringRef::size_type Length) const;
177  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
178                         unsigned ColumnLimit) const;
179  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
180                           WhitespaceManager &Whitespaces);
181  virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
182                                 Split Split,
183                                 WhitespaceManager &Whitespaces);
184  virtual void replaceWhitespaceBefore(unsigned LineIndex,
185                                       WhitespaceManager &Whitespaces);
186
187private:
188  // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
189  // so that all whitespace between the lines is accounted to Lines[LineIndex]
190  // as leading whitespace:
191  // - Lines[LineIndex] points to the text after that whitespace
192  // - Lines[LineIndex-1] shrinks by its trailing whitespace
193  // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
194  //   between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
195  //
196  // Sets StartOfLineColumn to the intended column in which the text at
197  // Lines[LineIndex] starts (note that the decoration, if present, is not
198  // considered part of the text).
199  void adjustWhitespace(unsigned LineIndex, int IndentDelta);
200
201  // Returns the column at which the text in line LineIndex starts, when broken
202  // at TailOffset. Note that the decoration (if present) is not considered part
203  // of the text.
204  unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
205
206  // Contains the text of the lines of the block comment, excluding the leading
207  // /* in the first line and trailing */ in the last line, and excluding all
208  // trailing whitespace between the lines. Note that the decoration (if
209  // present) is also not considered part of the text.
210  SmallVector<StringRef, 16> Lines;
211
212  // LeadingWhitespace[i] is the number of characters regarded as whitespace in
213  // front of Lines[i]. Note that this can include "* " sequences, which we
214  // regard as whitespace when all lines have a "*" prefix.
215  SmallVector<unsigned, 16> LeadingWhitespace;
216
217  // StartOfLineColumn[i] is the target column at which Line[i] should be.
218  // Note that this excludes a leading "* " or "*" in case all lines have
219  // a "*" prefix.
220  SmallVector<unsigned, 16> StartOfLineColumn;
221
222  // The column at which the text of a broken line should start.
223  // Note that an optional decoration would go before that column.
224  // IndentAtLineBreak is a uniform position for all lines in a block comment,
225  // regardless of their relative position.
226  // FIXME: Revisit the decision to do this; the main reason was to support
227  // patterns like
228  // /**************//**
229  //  * Comment
230  // We could also support such patterns by special casing the first line
231  // instead.
232  unsigned IndentAtLineBreak;
233
234  // This is to distinguish between the case when the last line was empty and
235  // the case when it started with a decoration ("*" or "* ").
236  bool LastLineNeedsDecoration;
237
238  // Either "* " if all lines begin with a "*", or empty.
239  StringRef Decoration;
240};
241
242} // namespace format
243} // namespace clang
244
245#endif // LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
246