BreakableToken.cpp revision 263508
1//===--- BreakableToken.cpp - Format C++ code -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief Contains implementation of BreakableToken class and classes derived 12/// from it. 13/// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "format-token-breaker" 17 18#include "BreakableToken.h" 19#include "clang/Basic/CharInfo.h" 20#include "clang/Format/Format.h" 21#include "llvm/ADT/STLExtras.h" 22#include "llvm/Support/Debug.h" 23#include <algorithm> 24 25namespace clang { 26namespace format { 27 28static const char *const Blanks = " \t\v\f\r"; 29static bool IsBlank(char C) { 30 switch (C) { 31 case ' ': 32 case '\t': 33 case '\v': 34 case '\f': 35 case '\r': 36 return true; 37 default: 38 return false; 39 } 40} 41 42static BreakableToken::Split getCommentSplit(StringRef Text, 43 unsigned ContentStartColumn, 44 unsigned ColumnLimit, 45 unsigned TabWidth, 46 encoding::Encoding Encoding) { 47 if (ColumnLimit <= ContentStartColumn + 1) 48 return BreakableToken::Split(StringRef::npos, 0); 49 50 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; 51 unsigned MaxSplitBytes = 0; 52 53 for (unsigned NumChars = 0; 54 NumChars < MaxSplit && MaxSplitBytes < Text.size();) { 55 unsigned BytesInChar = 56 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); 57 NumChars += 58 encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar), 59 ContentStartColumn, TabWidth, Encoding); 60 MaxSplitBytes += BytesInChar; 61 } 62 63 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); 64 if (SpaceOffset == StringRef::npos || 65 // Don't break at leading whitespace. 66 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { 67 // Make sure that we don't break at leading whitespace that 68 // reaches past MaxSplit. 69 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks); 70 if (FirstNonWhitespace == StringRef::npos) 71 // If the comment is only whitespace, we cannot split. 72 return BreakableToken::Split(StringRef::npos, 0); 73 SpaceOffset = Text.find_first_of( 74 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace)); 75 } 76 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { 77 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks); 78 StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks); 79 return BreakableToken::Split(BeforeCut.size(), 80 AfterCut.begin() - BeforeCut.end()); 81 } 82 return BreakableToken::Split(StringRef::npos, 0); 83} 84 85static BreakableToken::Split getStringSplit(StringRef Text, 86 unsigned UsedColumns, 87 unsigned ColumnLimit, 88 unsigned TabWidth, 89 encoding::Encoding Encoding) { 90 // FIXME: Reduce unit test case. 91 if (Text.empty()) 92 return BreakableToken::Split(StringRef::npos, 0); 93 if (ColumnLimit <= UsedColumns) 94 return BreakableToken::Split(StringRef::npos, 0); 95 unsigned MaxSplit = std::min<unsigned>( 96 ColumnLimit - UsedColumns, 97 encoding::columnWidthWithTabs(Text, UsedColumns, TabWidth, Encoding) - 1); 98 StringRef::size_type SpaceOffset = 0; 99 StringRef::size_type SlashOffset = 0; 100 StringRef::size_type WordStartOffset = 0; 101 StringRef::size_type SplitPoint = 0; 102 for (unsigned Chars = 0;;) { 103 unsigned Advance; 104 if (Text[0] == '\\') { 105 Advance = encoding::getEscapeSequenceLength(Text); 106 Chars += Advance; 107 } else { 108 Advance = encoding::getCodePointNumBytes(Text[0], Encoding); 109 Chars += encoding::columnWidthWithTabs( 110 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); 111 } 112 113 if (Chars > MaxSplit) 114 break; 115 116 if (IsBlank(Text[0])) 117 SpaceOffset = SplitPoint; 118 if (Text[0] == '/') 119 SlashOffset = SplitPoint; 120 if (Advance == 1 && !isAlphanumeric(Text[0])) 121 WordStartOffset = SplitPoint; 122 123 SplitPoint += Advance; 124 Text = Text.substr(Advance); 125 } 126 127 if (SpaceOffset != 0) 128 return BreakableToken::Split(SpaceOffset + 1, 0); 129 if (SlashOffset != 0) 130 return BreakableToken::Split(SlashOffset + 1, 0); 131 if (WordStartOffset != 0) 132 return BreakableToken::Split(WordStartOffset + 1, 0); 133 if (SplitPoint != 0) 134 return BreakableToken::Split(SplitPoint, 0); 135 return BreakableToken::Split(StringRef::npos, 0); 136} 137 138unsigned BreakableSingleLineToken::getLineCount() const { return 1; } 139 140unsigned BreakableSingleLineToken::getLineLengthAfterSplit( 141 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { 142 return StartColumn + Prefix.size() + Postfix.size() + 143 encoding::columnWidthWithTabs(Line.substr(Offset, Length), 144 StartColumn + Prefix.size(), 145 Style.TabWidth, Encoding); 146} 147 148BreakableSingleLineToken::BreakableSingleLineToken( 149 const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, 150 StringRef Prefix, StringRef Postfix, bool InPPDirective, 151 encoding::Encoding Encoding, const FormatStyle &Style) 152 : BreakableToken(Tok, IndentLevel, InPPDirective, Encoding, Style), 153 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { 154 assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); 155 Line = Tok.TokenText.substr( 156 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); 157} 158 159BreakableStringLiteral::BreakableStringLiteral( 160 const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, 161 StringRef Prefix, StringRef Postfix, bool InPPDirective, 162 encoding::Encoding Encoding, const FormatStyle &Style) 163 : BreakableSingleLineToken(Tok, IndentLevel, StartColumn, Prefix, Postfix, 164 InPPDirective, Encoding, Style) {} 165 166BreakableToken::Split 167BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, 168 unsigned ColumnLimit) const { 169 return getStringSplit(Line.substr(TailOffset), 170 StartColumn + Prefix.size() + Postfix.size(), 171 ColumnLimit, Style.TabWidth, Encoding); 172} 173 174void BreakableStringLiteral::insertBreak(unsigned LineIndex, 175 unsigned TailOffset, Split Split, 176 WhitespaceManager &Whitespaces) { 177 Whitespaces.replaceWhitespaceInToken( 178 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, 179 Prefix, InPPDirective, 1, IndentLevel, StartColumn); 180} 181 182static StringRef getLineCommentPrefix(StringRef Comment) { 183 static const char *const KnownPrefixes[] = { "/// ", "///", "// ", "//" }; 184 for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i) 185 if (Comment.startswith(KnownPrefixes[i])) 186 return KnownPrefixes[i]; 187 return ""; 188} 189 190BreakableLineComment::BreakableLineComment( 191 const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, 192 bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) 193 : BreakableSingleLineToken(Token, IndentLevel, StartColumn, 194 getLineCommentPrefix(Token.TokenText), "", 195 InPPDirective, Encoding, Style) { 196 OriginalPrefix = Prefix; 197 if (Token.TokenText.size() > Prefix.size() && 198 isAlphanumeric(Token.TokenText[Prefix.size()])) { 199 if (Prefix == "//") 200 Prefix = "// "; 201 else if (Prefix == "///") 202 Prefix = "/// "; 203 } 204} 205 206BreakableToken::Split 207BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, 208 unsigned ColumnLimit) const { 209 return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), 210 ColumnLimit, Style.TabWidth, Encoding); 211} 212 213void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 214 Split Split, 215 WhitespaceManager &Whitespaces) { 216 Whitespaces.replaceWhitespaceInToken( 217 Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, 218 Postfix, Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, StartColumn); 219} 220 221void BreakableLineComment::replaceWhitespace(unsigned LineIndex, 222 unsigned TailOffset, Split Split, 223 WhitespaceManager &Whitespaces) { 224 Whitespaces.replaceWhitespaceInToken( 225 Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, "", 226 "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0, 227 /*Spaces=*/1); 228} 229 230void 231BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex, 232 WhitespaceManager &Whitespaces) { 233 if (OriginalPrefix != Prefix) { 234 Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "", 235 /*InPPDirective=*/false, 236 /*Newlines=*/0, /*IndentLevel=*/0, 237 /*Spaces=*/1); 238 } 239} 240 241BreakableBlockComment::BreakableBlockComment( 242 const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, 243 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, 244 encoding::Encoding Encoding, const FormatStyle &Style) 245 : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) { 246 StringRef TokenText(Token.TokenText); 247 assert(TokenText.startswith("/*") && TokenText.endswith("*/")); 248 TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); 249 250 int IndentDelta = StartColumn - OriginalStartColumn; 251 LeadingWhitespace.resize(Lines.size()); 252 StartOfLineColumn.resize(Lines.size()); 253 StartOfLineColumn[0] = StartColumn + 2; 254 for (size_t i = 1; i < Lines.size(); ++i) 255 adjustWhitespace(i, IndentDelta); 256 257 Decoration = "* "; 258 if (Lines.size() == 1 && !FirstInLine) { 259 // Comments for which FirstInLine is false can start on arbitrary column, 260 // and available horizontal space can be too small to align consecutive 261 // lines with the first one. 262 // FIXME: We could, probably, align them to current indentation level, but 263 // now we just wrap them without stars. 264 Decoration = ""; 265 } 266 for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) { 267 // If the last line is empty, the closing "*/" will have a star. 268 if (i + 1 == e && Lines[i].empty()) 269 break; 270 while (!Lines[i].startswith(Decoration)) 271 Decoration = Decoration.substr(0, Decoration.size() - 1); 272 } 273 274 LastLineNeedsDecoration = true; 275 IndentAtLineBreak = StartOfLineColumn[0] + 1; 276 for (size_t i = 1; i < Lines.size(); ++i) { 277 if (Lines[i].empty()) { 278 if (i + 1 == Lines.size()) { 279 // Empty last line means that we already have a star as a part of the 280 // trailing */. We also need to preserve whitespace, so that */ is 281 // correctly indented. 282 LastLineNeedsDecoration = false; 283 } else if (Decoration.empty()) { 284 // For all other lines, set the start column to 0 if they're empty, so 285 // we do not insert trailing whitespace anywhere. 286 StartOfLineColumn[i] = 0; 287 } 288 continue; 289 } 290 // The first line already excludes the star. 291 // For all other lines, adjust the line to exclude the star and 292 // (optionally) the first whitespace. 293 StartOfLineColumn[i] += Decoration.size(); 294 Lines[i] = Lines[i].substr(Decoration.size()); 295 LeadingWhitespace[i] += Decoration.size(); 296 IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]); 297 } 298 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); 299 DEBUG({ 300 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; 301 for (size_t i = 0; i < Lines.size(); ++i) { 302 llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i] 303 << "\n"; 304 } 305 }); 306} 307 308void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, 309 int IndentDelta) { 310 // When in a preprocessor directive, the trailing backslash in a block comment 311 // is not needed, but can serve a purpose of uniformity with necessary escaped 312 // newlines outside the comment. In this case we remove it here before 313 // trimming the trailing whitespace. The backslash will be re-added later when 314 // inserting a line break. 315 size_t EndOfPreviousLine = Lines[LineIndex - 1].size(); 316 if (InPPDirective && Lines[LineIndex - 1].endswith("\\")) 317 --EndOfPreviousLine; 318 319 // Calculate the end of the non-whitespace text in the previous line. 320 EndOfPreviousLine = 321 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine); 322 if (EndOfPreviousLine == StringRef::npos) 323 EndOfPreviousLine = 0; 324 else 325 ++EndOfPreviousLine; 326 // Calculate the start of the non-whitespace text in the current line. 327 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks); 328 if (StartOfLine == StringRef::npos) 329 StartOfLine = Lines[LineIndex].size(); 330 331 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); 332 // Adjust Lines to only contain relevant text. 333 Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); 334 Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); 335 // Adjust LeadingWhitespace to account all whitespace between the lines 336 // to the current line. 337 LeadingWhitespace[LineIndex] = 338 Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); 339 340 // Adjust the start column uniformly accross all lines. 341 StartOfLineColumn[LineIndex] = std::max<int>( 342 0, 343 encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + 344 IndentDelta); 345} 346 347unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } 348 349unsigned BreakableBlockComment::getLineLengthAfterSplit( 350 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { 351 unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset); 352 return ContentStartColumn + 353 encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length), 354 ContentStartColumn, Style.TabWidth, 355 Encoding) + 356 // The last line gets a "*/" postfix. 357 (LineIndex + 1 == Lines.size() ? 2 : 0); 358} 359 360BreakableToken::Split 361BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, 362 unsigned ColumnLimit) const { 363 return getCommentSplit(Lines[LineIndex].substr(TailOffset), 364 getContentStartColumn(LineIndex, TailOffset), 365 ColumnLimit, Style.TabWidth, Encoding); 366} 367 368void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 369 Split Split, 370 WhitespaceManager &Whitespaces) { 371 StringRef Text = Lines[LineIndex].substr(TailOffset); 372 StringRef Prefix = Decoration; 373 if (LineIndex + 1 == Lines.size() && 374 Text.size() == Split.first + Split.second) { 375 // For the last line we need to break before "*/", but not to add "* ". 376 Prefix = ""; 377 } 378 379 unsigned BreakOffsetInToken = 380 Text.data() - Tok.TokenText.data() + Split.first; 381 unsigned CharsToRemove = Split.second; 382 assert(IndentAtLineBreak >= Decoration.size()); 383 Whitespaces.replaceWhitespaceInToken( 384 Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1, 385 IndentLevel, IndentAtLineBreak - Decoration.size()); 386} 387 388void BreakableBlockComment::replaceWhitespace(unsigned LineIndex, 389 unsigned TailOffset, Split Split, 390 WhitespaceManager &Whitespaces) { 391 StringRef Text = Lines[LineIndex].substr(TailOffset); 392 unsigned BreakOffsetInToken = 393 Text.data() - Tok.TokenText.data() + Split.first; 394 unsigned CharsToRemove = Split.second; 395 Whitespaces.replaceWhitespaceInToken( 396 Tok, BreakOffsetInToken, CharsToRemove, "", "", /*InPPDirective=*/false, 397 /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1); 398} 399 400void 401BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex, 402 WhitespaceManager &Whitespaces) { 403 if (LineIndex == 0) 404 return; 405 StringRef Prefix = Decoration; 406 if (Lines[LineIndex].empty()) { 407 if (LineIndex + 1 == Lines.size()) { 408 if (!LastLineNeedsDecoration) { 409 // If the last line was empty, we don't need a prefix, as the */ will 410 // line up with the decoration (if it exists). 411 Prefix = ""; 412 } 413 } else if (!Decoration.empty()) { 414 // For other empty lines, if we do have a decoration, adapt it to not 415 // contain a trailing whitespace. 416 Prefix = Prefix.substr(0, 1); 417 } 418 } else { 419 if (StartOfLineColumn[LineIndex] == 1) { 420 // This line starts immediately after the decorating *. 421 Prefix = Prefix.substr(0, 1); 422 } 423 } 424 425 unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() - 426 Tok.TokenText.data() - 427 LeadingWhitespace[LineIndex]; 428 assert(StartOfLineColumn[LineIndex] >= Prefix.size()); 429 Whitespaces.replaceWhitespaceInToken( 430 Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, 431 InPPDirective, 1, IndentLevel, 432 StartOfLineColumn[LineIndex] - Prefix.size()); 433} 434 435unsigned 436BreakableBlockComment::getContentStartColumn(unsigned LineIndex, 437 unsigned TailOffset) const { 438 // If we break, we always break at the predefined indent. 439 if (TailOffset != 0) 440 return IndentAtLineBreak; 441 return StartOfLineColumn[LineIndex]; 442} 443 444} // namespace format 445} // namespace clang 446