1115075Shmp//===--- FormatToken.cpp - Format C++ code --------------------------------===// 2115075Shmp// 3115075Shmp// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4115075Shmp// See https://llvm.org/LICENSE.txt for license information. 5115075Shmp// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6115075Shmp// 7115075Shmp//===----------------------------------------------------------------------===// 8115075Shmp/// 9115075Shmp/// \file 10115075Shmp/// This file implements specific functions of \c FormatTokens and their 11115075Shmp/// roles. 12115075Shmp/// 13115075Shmp//===----------------------------------------------------------------------===// 14115075Shmp 15115075Shmp#include "FormatToken.h" 16115075Shmp#include "ContinuationIndenter.h" 17115075Shmp#include "llvm/ADT/SmallVector.h" 18115075Shmp#include "llvm/Support/Debug.h" 19115075Shmp#include <climits> 20115075Shmp 21115075Shmpnamespace clang { 22115075Shmpnamespace format { 23115075Shmp 24115075Shmpconst char *getTokenTypeName(TokenType Type) { 25115075Shmp static const char *const TokNames[] = { 26115075Shmp#define TYPE(X) #X, 27115075Shmp LIST_TOKEN_TYPES 28300317Sjhb#undef TYPE 29115075Shmp nullptr}; 30115075Shmp 31115075Shmp if (Type < NUM_TOKEN_TYPES) 32115075Shmp return TokNames[Type]; 33115418Sru llvm_unreachable("unknown TokenType"); 34221220Sjhb return nullptr; 35115418Sru} 36115418Sru 37115418Sru// FIXME: This is copy&pasted from Sema. Put it in a common place and remove 38221220Sjhb// duplication. 39221220Sjhbbool FormatToken::isSimpleTypeSpecifier() const { 40115418Sru switch (Tok.getKind()) { 41221220Sjhb case tok::kw_short: 42221220Sjhb case tok::kw_long: 43115418Sru case tok::kw___int64: 44115418Sru case tok::kw___int128: 45115418Sru case tok::kw_signed: 46115418Sru case tok::kw_unsigned: 47115418Sru case tok::kw_void: 48115418Sru case tok::kw_char: 49129365Shmp case tok::kw_int: 50115418Sru case tok::kw_half: 51115418Sru case tok::kw_float: 52300317Sjhb case tok::kw_double: 53300317Sjhb case tok::kw___bf16: 54115418Sru case tok::kw__Float16: 55115418Sru case tok::kw___float128: 56115418Sru case tok::kw___ibm128: 57115418Sru case tok::kw_wchar_t: 58115418Sru case tok::kw_bool: 59115418Sru#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait: 60115418Sru#include "clang/Basic/TransformTypeTraits.def" 61115418Sru case tok::annot_typename: 62115075Shmp case tok::kw_char8_t: 63115075Shmp case tok::kw_char16_t: 64218506Simp case tok::kw_char32_t: 65115075Shmp case tok::kw_typeof: 66115075Shmp case tok::kw_decltype: 67115075Shmp case tok::kw__Atomic: 68115075Shmp return true; 69294883Sjhibbits default: 70221220Sjhb return false; 71115075Shmp } 72115075Shmp} 73115075Shmp 74115075Shmpbool FormatToken::isTypeOrIdentifier() const { 75115075Shmp return isSimpleTypeSpecifier() || Tok.isOneOf(tok::kw_auto, tok::identifier); 76115075Shmp} 77221220Sjhb 78221220Sjhbbool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const { 79221220Sjhb assert(is(tok::r_brace)); 80221220Sjhb if (!Style.Cpp11BracedListStyle || 81294883Sjhibbits Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) { 82115075Shmp return false; 83294883Sjhibbits } 84221220Sjhb const auto *LBrace = MatchingParen; 85294883Sjhibbits assert(LBrace && LBrace->is(tok::l_brace)); 86221220Sjhb if (LBrace->is(BK_BracedInit)) 87115075Shmp return true; 88115418Sru if (LBrace->Previous && LBrace->Previous->is(tok::equal)) 89115418Sru return true; 90294883Sjhibbits return false; 91115418Sru} 92115418Sru 93115418Srubool FormatToken::opensBlockOrBlockTypeList(const FormatStyle &Style) const { 94115418Sru // C# Does not indent object initialisers as continuations. 95294883Sjhibbits if (is(tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp()) 96294883Sjhibbits return true; 97115418Sru if (is(TT_TemplateString) && opensScope()) 98115075Shmp return true; 99115075Shmp return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) || 100294883Sjhibbits (is(tok::l_brace) && 101115418Sru (getBlockKind() == BK_Block || is(TT_DictLiteral) || 102294883Sjhibbits (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || 103115418Sru (is(tok::less) && Style.isProto()); 104129365Shmp} 105129365Shmp 106294883SjhibbitsTokenRole::~TokenRole() {} 107115418Sru 108115075Shmpvoid TokenRole::precomputeFormattingInfos(const FormatToken *Token) {} 109115418Sru 110115075Shmpunsigned CommaSeparatedList::formatAfterToken(LineState &State, 111300317Sjhb ContinuationIndenter *Indenter, 112300317Sjhb bool DryRun) { 113300317Sjhb if (!State.NextToken || !State.NextToken->Previous) 114300317Sjhb return 0; 115115418Sru 116115418Sru if (Formats.size() <= 1) 117115418Sru return 0; // Handled by formatFromToken (1) or avoid severe penalty (0). 118115075Shmp 119115418Sru // Ensure that we start on the opening brace. 120115075Shmp const FormatToken *LBrace = 121115418Sru State.NextToken->Previous->getPreviousNonComment(); 122115075Shmp if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || 123115418Sru LBrace->is(BK_Block) || LBrace->is(TT_DictLiteral) || 124115075Shmp LBrace->Next->is(TT_DesignatedInitializerPeriod)) { 125115418Sru return 0; 126115075Shmp } 127115418Sru 128115418Sru // Calculate the number of code points we have to format this list. As the 129115418Sru // first token is already placed, we have to subtract it. 130115075Shmp unsigned RemainingCodePoints = 131115075Shmp Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth; 132115075Shmp 133115075Shmp // Find the best ColumnFormat, i.e. the best number of columns to use. 134115075Shmp const ColumnFormat *Format = getColumnFormat(RemainingCodePoints); 135115075Shmp 136115075Shmp // If no ColumnFormat can be used, the braced list would generally be 137115075Shmp // bin-packed. Add a severe penalty to this so that column layouts are 138115075Shmp // preferred if possible. 139115075Shmp if (!Format) 140115075Shmp return 10000; 141115075Shmp 142115075Shmp // Format the entire list. 143115075Shmp unsigned Penalty = 0; 144115075Shmp unsigned Column = 0; 145115075Shmp unsigned Item = 0; 146115075Shmp while (State.NextToken != LBrace->MatchingParen) { 147115075Shmp bool NewLine = false; 148115075Shmp unsigned ExtraSpaces = 0; 149300317Sjhb 150115075Shmp // If the previous token was one of our commas, we are now on the next item. 151115075Shmp if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) { 152268780Struckman if (!State.NextToken->isTrailingComment()) { 153115075Shmp ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item]; 154115075Shmp ++Column; 155115075Shmp } 156115075Shmp ++Item; 157115075Shmp } 158115075Shmp 159115075Shmp if (Column == Format->Columns || State.NextToken->MustBreakBefore) { 160169101Sjmg Column = 0; 161169101Sjmg NewLine = true; 162169101Sjmg } 163169101Sjmg 164169101Sjmg // Place token using the continuation indenter and store the penalty. 165169101Sjmg Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces); 166169101Sjmg } 167169101Sjmg return Penalty; 168169101Sjmg} 169169101Sjmg 170169101Sjmgunsigned CommaSeparatedList::formatFromToken(LineState &State, 171169101Sjmg ContinuationIndenter *Indenter, 172169101Sjmg bool DryRun) { 173169101Sjmg // Formatting with 1 Column isn't really a column layout, so we don't need the 174169101Sjmg // special logic here. We can just avoid bin packing any of the parameters. 175221220Sjhb if (Formats.size() == 1 || HasNestedBracedList) 176221220Sjhb State.Stack.back().AvoidBinPacking = true; 177221220Sjhb return 0; 178221220Sjhb} 179221220Sjhb 180221220Sjhb// Returns the lengths in code points between Begin and End (both included), 181221220Sjhb// assuming that the entire sequence is put on a single line. 182221220Sjhbstatic unsigned CodePointsBetween(const FormatToken *Begin, 183115075Shmp const FormatToken *End) { 184169075Sjmg assert(End->TotalLength >= Begin->TotalLength); 185169075Sjmg return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth; 186210933Sjoel} 187169075Sjmg 188169075Sjmgvoid CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { 189210933Sjoel // FIXME: At some point we might want to do this for other lists, too. 190115075Shmp if (!Token->MatchingParen || 191115075Shmp !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) { 192115075Shmp return; 193115075Shmp } 194115075Shmp 195115075Shmp // In C++11 braced list style, we should not format in columns unless they 196115418Sru // have many items (20 or more) or we allow bin-packing of function call 197115418Sru // arguments. 198115075Shmp if (Style.Cpp11BracedListStyle && !Style.BinPackArguments && 199115075Shmp Commas.size() < 19) { 200115075Shmp return; 201115075Shmp } 202115075Shmp 203115075Shmp // Limit column layout for JavaScript array initializers to 20 or more items 204115075Shmp // for now to introduce it carefully. We can become more aggressive if this 205115075Shmp // necessary. 206115075Shmp if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19) 207115075Shmp return; 208115075Shmp 209115075Shmp // Column format doesn't really make sense if we don't align after brackets. 210115075Shmp if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) 211115075Shmp return; 212115418Sru 213115418Sru FormatToken *ItemBegin = Token->Next; 214115418Sru while (ItemBegin->isTrailingComment()) 215115418Sru ItemBegin = ItemBegin->Next; 216115418Sru SmallVector<bool, 8> MustBreakBeforeItem; 217169075Sjmg 218169075Sjmg // The lengths of an item if it is put at the end of the line. This includes 219169075Sjmg // trailing comments which are otherwise ignored for column alignment. 220169075Sjmg SmallVector<unsigned, 8> EndOfLineItemLength; 221169075Sjmg MustBreakBeforeItem.reserve(Commas.size() + 1); 222221220Sjhb EndOfLineItemLength.reserve(Commas.size() + 1); 223221220Sjhb ItemLengths.reserve(Commas.size() + 1); 224221220Sjhb 225221220Sjhb bool HasSeparatingComment = false; 226169075Sjmg for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) { 227221220Sjhb assert(ItemBegin); 228169075Sjmg // Skip comments on their own line. 229169075Sjmg while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) { 230115075Shmp ItemBegin = ItemBegin->Next; 231115075Shmp HasSeparatingComment = i > 0; 232221220Sjhb } 233221220Sjhb 234221220Sjhb MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore); 235221220Sjhb if (ItemBegin->is(tok::l_brace)) 236221220Sjhb HasNestedBracedList = true; 237221220Sjhb const FormatToken *ItemEnd = nullptr; 238221220Sjhb if (i == Commas.size()) { 239221220Sjhb ItemEnd = Token->MatchingParen; 240221220Sjhb const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment(); 241221220Sjhb ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd)); 242221220Sjhb if (Style.Cpp11BracedListStyle && 243221220Sjhb !ItemEnd->Previous->isTrailingComment()) { 244221220Sjhb // In Cpp11 braced list style, the } and possibly other subsequent 245221220Sjhb // tokens will need to stay on a line with the last element. 246221220Sjhb while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore) 247221220Sjhb ItemEnd = ItemEnd->Next; 248221220Sjhb } else { 249221220Sjhb // In other braced lists styles, the "}" can be wrapped to the new line. 250221220Sjhb ItemEnd = Token->MatchingParen->Previous; 251221220Sjhb } 252221220Sjhb } else { 253221220Sjhb ItemEnd = Commas[i]; 254221220Sjhb // The comma is counted as part of the item when calculating the length. 255221220Sjhb ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd)); 256221220Sjhb 257221220Sjhb // Consume trailing comments so the are included in EndOfLineItemLength. 258221220Sjhb if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline && 259221220Sjhb ItemEnd->Next->isTrailingComment()) { 260221220Sjhb ItemEnd = ItemEnd->Next; 261221220Sjhb } 262221220Sjhb } 263221220Sjhb EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd)); 264221220Sjhb // If there is a trailing comma in the list, the next item will start at the 265115075Shmp // closing brace. Don't create an extra item for this. 266115075Shmp if (ItemEnd->getNextNonComment() == Token->MatchingParen) 267115418Sru break; 268115418Sru ItemBegin = ItemEnd->Next; 269115075Shmp } 270115075Shmp 271115075Shmp // Don't use column layout for lists with few elements and in presence of 272115075Shmp // separating comments. 273115418Sru if (Commas.size() < 5 || HasSeparatingComment) 274115418Sru return; 275115418Sru 276115418Sru if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19) 277221220Sjhb return; 278221220Sjhb 279221220Sjhb // We can never place more than ColumnLimit / 3 items in a row (because of the 280147398Sru // spaces and the comma). 281143670Simp unsigned MaxItems = Style.ColumnLimit / 3; 282143670Simp SmallVector<unsigned> MinSizeInColumn; 283143670Simp MinSizeInColumn.reserve(MaxItems); 284143670Simp for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) { 285143670Simp ColumnFormat Format; 286143670Simp Format.Columns = Columns; 287147398Sru Format.ColumnSizes.resize(Columns); 288147398Sru MinSizeInColumn.assign(Columns, UINT_MAX); 289143670Simp Format.LineCount = 1; 290143670Simp bool HasRowWithSufficientColumns = false; 291221220Sjhb unsigned Column = 0; 292221220Sjhb for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) { 293221220Sjhb assert(i < MustBreakBeforeItem.size()); 294221220Sjhb if (MustBreakBeforeItem[i] || Column == Columns) { 295221220Sjhb ++Format.LineCount; 296221220Sjhb Column = 0; 297221220Sjhb } 298221220Sjhb if (Column == Columns - 1) 299221220Sjhb HasRowWithSufficientColumns = true; 300221220Sjhb unsigned Length = 301221220Sjhb (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i]; 302221220Sjhb Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length); 303221220Sjhb MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length); 304268780Struckman ++Column; 305115075Shmp } 306268780Struckman // If all rows are terminated early (e.g. by trailing comments), we don't 307115075Shmp // need to look further. 308115075Shmp if (!HasRowWithSufficientColumns) 309115075Shmp break; 310115075Shmp Format.TotalWidth = Columns - 1; // Width of the N-1 spaces. 311115075Shmp 312115075Shmp for (unsigned i = 0; i < Columns; ++i) 313115075Shmp Format.TotalWidth += Format.ColumnSizes[i]; 314115075Shmp 315115075Shmp // Don't use this Format, if the difference between the longest and shortest 316221220Sjhb // element in a column exceeds a threshold to avoid excessive spaces. 317115075Shmp if ([&] { 318115075Shmp for (unsigned i = 0; i < Columns - 1; ++i) 319115075Shmp if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10) 320115075Shmp return true; 321115075Shmp return false; 322115075Shmp }()) { 323115075Shmp continue; 324115075Shmp } 325115075Shmp 326115075Shmp // Ignore layouts that are bound to violate the column limit. 327221220Sjhb if (Format.TotalWidth > Style.ColumnLimit && Columns > 1) 328221220Sjhb continue; 329221220Sjhb 330221220Sjhb Formats.push_back(Format); 331221220Sjhb } 332221220Sjhb} 333221220Sjhb 334221220Sjhbconst CommaSeparatedList::ColumnFormat * 335221220SjhbCommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const { 336221220Sjhb const ColumnFormat *BestFormat = nullptr; 337221220Sjhb for (const ColumnFormat &Format : llvm::reverse(Formats)) { 338221220Sjhb if (Format.TotalWidth <= RemainingCharacters || Format.Columns == 1) { 339221220Sjhb if (BestFormat && Format.LineCount > BestFormat->LineCount) 340221220Sjhb break; 341221220Sjhb BestFormat = &Format; 342221220Sjhb } 343221220Sjhb } 344221220Sjhb return BestFormat; 345221220Sjhb} 346221220Sjhb 347221220Sjhb} // namespace format 348221220Sjhb} // namespace clang 349221220Sjhb