1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#include "UnwrappedLineParser.h"
16#include "FormatToken.h"
17#include "FormatTokenLexer.h"
18#include "FormatTokenSource.h"
19#include "Macros.h"
20#include "TokenAnnotator.h"
21#include "clang/Basic/TokenKinds.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/raw_os_ostream.h"
26#include "llvm/Support/raw_ostream.h"
27
28#include <algorithm>
29#include <utility>
30
31#define DEBUG_TYPE "format-parser"
32
33namespace clang {
34namespace format {
35
36namespace {
37
38void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39               StringRef Prefix = "", bool PrintText = false) {
40  OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41     << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42  bool NewLine = false;
43  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44                                                    E = Line.Tokens.end();
45       I != E; ++I) {
46    if (NewLine) {
47      OS << Prefix;
48      NewLine = false;
49    }
50    OS << I->Tok->Tok.getName() << "[" << "T=" << (unsigned)I->Tok->getType()
51       << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
52       << "\"] ";
53    for (SmallVectorImpl<UnwrappedLine>::const_iterator
54             CI = I->Children.begin(),
55             CE = I->Children.end();
56         CI != CE; ++CI) {
57      OS << "\n";
58      printLine(OS, *CI, (Prefix + "  ").str());
59      NewLine = true;
60    }
61  }
62  if (!NewLine)
63    OS << "\n";
64}
65
66LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
67  printLine(llvm::dbgs(), Line);
68}
69
70class ScopedDeclarationState {
71public:
72  ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
73                         bool MustBeDeclaration)
74      : Line(Line), Stack(Stack) {
75    Line.MustBeDeclaration = MustBeDeclaration;
76    Stack.push_back(MustBeDeclaration);
77  }
78  ~ScopedDeclarationState() {
79    Stack.pop_back();
80    if (!Stack.empty())
81      Line.MustBeDeclaration = Stack.back();
82    else
83      Line.MustBeDeclaration = true;
84  }
85
86private:
87  UnwrappedLine &Line;
88  llvm::BitVector &Stack;
89};
90
91} // end anonymous namespace
92
93class ScopedLineState {
94public:
95  ScopedLineState(UnwrappedLineParser &Parser,
96                  bool SwitchToPreprocessorLines = false)
97      : Parser(Parser), OriginalLines(Parser.CurrentLines) {
98    if (SwitchToPreprocessorLines)
99      Parser.CurrentLines = &Parser.PreprocessorDirectives;
100    else if (!Parser.Line->Tokens.empty())
101      Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
102    PreBlockLine = std::move(Parser.Line);
103    Parser.Line = std::make_unique<UnwrappedLine>();
104    Parser.Line->Level = PreBlockLine->Level;
105    Parser.Line->PPLevel = PreBlockLine->PPLevel;
106    Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
107    Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
108  }
109
110  ~ScopedLineState() {
111    if (!Parser.Line->Tokens.empty())
112      Parser.addUnwrappedLine();
113    assert(Parser.Line->Tokens.empty());
114    Parser.Line = std::move(PreBlockLine);
115    if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
116      Parser.MustBreakBeforeNextToken = true;
117    Parser.CurrentLines = OriginalLines;
118  }
119
120private:
121  UnwrappedLineParser &Parser;
122
123  std::unique_ptr<UnwrappedLine> PreBlockLine;
124  SmallVectorImpl<UnwrappedLine> *OriginalLines;
125};
126
127class CompoundStatementIndenter {
128public:
129  CompoundStatementIndenter(UnwrappedLineParser *Parser,
130                            const FormatStyle &Style, unsigned &LineLevel)
131      : CompoundStatementIndenter(Parser, LineLevel,
132                                  Style.BraceWrapping.AfterControlStatement,
133                                  Style.BraceWrapping.IndentBraces) {}
134  CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
135                            bool WrapBrace, bool IndentBrace)
136      : LineLevel(LineLevel), OldLineLevel(LineLevel) {
137    if (WrapBrace)
138      Parser->addUnwrappedLine();
139    if (IndentBrace)
140      ++LineLevel;
141  }
142  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
143
144private:
145  unsigned &LineLevel;
146  unsigned OldLineLevel;
147};
148
149UnwrappedLineParser::UnwrappedLineParser(
150    SourceManager &SourceMgr, const FormatStyle &Style,
151    const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
152    ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
153    llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
154    IdentifierTable &IdentTable)
155    : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
156      CurrentLines(&Lines), Style(Style), Keywords(Keywords),
157      CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
158      Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
159      IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
160                       ? IG_Rejected
161                       : IG_Inited),
162      IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
163      Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
164
165void UnwrappedLineParser::reset() {
166  PPBranchLevel = -1;
167  IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
168                     ? IG_Rejected
169                     : IG_Inited;
170  IncludeGuardToken = nullptr;
171  Line.reset(new UnwrappedLine);
172  CommentsBeforeNextToken.clear();
173  FormatTok = nullptr;
174  MustBreakBeforeNextToken = false;
175  IsDecltypeAutoFunction = false;
176  PreprocessorDirectives.clear();
177  CurrentLines = &Lines;
178  DeclarationScopeStack.clear();
179  NestedTooDeep.clear();
180  NestedLambdas.clear();
181  PPStack.clear();
182  Line->FirstStartColumn = FirstStartColumn;
183
184  if (!Unexpanded.empty())
185    for (FormatToken *Token : AllTokens)
186      Token->MacroCtx.reset();
187  CurrentExpandedLines.clear();
188  ExpandedLines.clear();
189  Unexpanded.clear();
190  InExpansion = false;
191  Reconstruct.reset();
192}
193
194void UnwrappedLineParser::parse() {
195  IndexedTokenSource TokenSource(AllTokens);
196  Line->FirstStartColumn = FirstStartColumn;
197  do {
198    LLVM_DEBUG(llvm::dbgs() << "----\n");
199    reset();
200    Tokens = &TokenSource;
201    TokenSource.reset();
202
203    readToken();
204    parseFile();
205
206    // If we found an include guard then all preprocessor directives (other than
207    // the guard) are over-indented by one.
208    if (IncludeGuard == IG_Found) {
209      for (auto &Line : Lines)
210        if (Line.InPPDirective && Line.Level > 0)
211          --Line.Level;
212    }
213
214    // Create line with eof token.
215    assert(eof());
216    pushToken(FormatTok);
217    addUnwrappedLine();
218
219    // In a first run, format everything with the lines containing macro calls
220    // replaced by the expansion.
221    if (!ExpandedLines.empty()) {
222      LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
223      for (const auto &Line : Lines) {
224        if (!Line.Tokens.empty()) {
225          auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
226          if (it != ExpandedLines.end()) {
227            for (const auto &Expanded : it->second) {
228              LLVM_DEBUG(printDebugInfo(Expanded));
229              Callback.consumeUnwrappedLine(Expanded);
230            }
231            continue;
232          }
233        }
234        LLVM_DEBUG(printDebugInfo(Line));
235        Callback.consumeUnwrappedLine(Line);
236      }
237      Callback.finishRun();
238    }
239
240    LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
241    for (const UnwrappedLine &Line : Lines) {
242      LLVM_DEBUG(printDebugInfo(Line));
243      Callback.consumeUnwrappedLine(Line);
244    }
245    Callback.finishRun();
246    Lines.clear();
247    while (!PPLevelBranchIndex.empty() &&
248           PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
249      PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
250      PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
251    }
252    if (!PPLevelBranchIndex.empty()) {
253      ++PPLevelBranchIndex.back();
254      assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
255      assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
256    }
257  } while (!PPLevelBranchIndex.empty());
258}
259
260void UnwrappedLineParser::parseFile() {
261  // The top-level context in a file always has declarations, except for pre-
262  // processor directives and JavaScript files.
263  bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
264  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
265                                          MustBeDeclaration);
266  if (Style.Language == FormatStyle::LK_TextProto)
267    parseBracedList();
268  else
269    parseLevel();
270  // Make sure to format the remaining tokens.
271  //
272  // LK_TextProto is special since its top-level is parsed as the body of a
273  // braced list, which does not necessarily have natural line separators such
274  // as a semicolon. Comments after the last entry that have been determined to
275  // not belong to that line, as in:
276  //   key: value
277  //   // endfile comment
278  // do not have a chance to be put on a line of their own until this point.
279  // Here we add this newline before end-of-file comments.
280  if (Style.Language == FormatStyle::LK_TextProto &&
281      !CommentsBeforeNextToken.empty()) {
282    addUnwrappedLine();
283  }
284  flushComments(true);
285  addUnwrappedLine();
286}
287
288void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
289  do {
290    switch (FormatTok->Tok.getKind()) {
291    case tok::l_brace:
292      return;
293    default:
294      if (FormatTok->is(Keywords.kw_where)) {
295        addUnwrappedLine();
296        nextToken();
297        parseCSharpGenericTypeConstraint();
298        break;
299      }
300      nextToken();
301      break;
302    }
303  } while (!eof());
304}
305
306void UnwrappedLineParser::parseCSharpAttribute() {
307  int UnpairedSquareBrackets = 1;
308  do {
309    switch (FormatTok->Tok.getKind()) {
310    case tok::r_square:
311      nextToken();
312      --UnpairedSquareBrackets;
313      if (UnpairedSquareBrackets == 0) {
314        addUnwrappedLine();
315        return;
316      }
317      break;
318    case tok::l_square:
319      ++UnpairedSquareBrackets;
320      nextToken();
321      break;
322    default:
323      nextToken();
324      break;
325    }
326  } while (!eof());
327}
328
329bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
330  if (!Lines.empty() && Lines.back().InPPDirective)
331    return true;
332
333  const FormatToken *Previous = Tokens->getPreviousToken();
334  return Previous && Previous->is(tok::comment) &&
335         (Previous->IsMultiline || Previous->NewlinesBefore > 0);
336}
337
338/// \brief Parses a level, that is ???.
339/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
340/// \param IfKind The \p if statement kind in the level.
341/// \param IfLeftBrace The left brace of the \p if block in the level.
342/// \returns true if a simple block of if/else/for/while, or false otherwise.
343/// (A simple block has a single statement.)
344bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
345                                     IfStmtKind *IfKind,
346                                     FormatToken **IfLeftBrace) {
347  const bool InRequiresExpression =
348      OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
349  const bool IsPrecededByCommentOrPPDirective =
350      !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
351  FormatToken *IfLBrace = nullptr;
352  bool HasDoWhile = false;
353  bool HasLabel = false;
354  unsigned StatementCount = 0;
355  bool SwitchLabelEncountered = false;
356
357  do {
358    if (FormatTok->isAttribute()) {
359      nextToken();
360      continue;
361    }
362    tok::TokenKind kind = FormatTok->Tok.getKind();
363    if (FormatTok->getType() == TT_MacroBlockBegin)
364      kind = tok::l_brace;
365    else if (FormatTok->getType() == TT_MacroBlockEnd)
366      kind = tok::r_brace;
367
368    auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
369                         &HasLabel, &StatementCount] {
370      parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
371                             HasDoWhile ? nullptr : &HasDoWhile,
372                             HasLabel ? nullptr : &HasLabel);
373      ++StatementCount;
374      assert(StatementCount > 0 && "StatementCount overflow!");
375    };
376
377    switch (kind) {
378    case tok::comment:
379      nextToken();
380      addUnwrappedLine();
381      break;
382    case tok::l_brace:
383      if (InRequiresExpression) {
384        FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
385      } else if (FormatTok->Previous &&
386                 FormatTok->Previous->ClosesRequiresClause) {
387        // We need the 'default' case here to correctly parse a function
388        // l_brace.
389        ParseDefault();
390        continue;
391      }
392      if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin) &&
393          tryToParseBracedList()) {
394        continue;
395      }
396      parseBlock();
397      ++StatementCount;
398      assert(StatementCount > 0 && "StatementCount overflow!");
399      addUnwrappedLine();
400      break;
401    case tok::r_brace:
402      if (OpeningBrace) {
403        if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
404            !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
405          return false;
406        }
407        if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
408            HasDoWhile || IsPrecededByCommentOrPPDirective ||
409            precededByCommentOrPPDirective()) {
410          return false;
411        }
412        const FormatToken *Next = Tokens->peekNextToken();
413        if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
414          return false;
415        if (IfLeftBrace)
416          *IfLeftBrace = IfLBrace;
417        return true;
418      }
419      nextToken();
420      addUnwrappedLine();
421      break;
422    case tok::kw_default: {
423      unsigned StoredPosition = Tokens->getPosition();
424      FormatToken *Next;
425      do {
426        Next = Tokens->getNextToken();
427        assert(Next);
428      } while (Next->is(tok::comment));
429      FormatTok = Tokens->setPosition(StoredPosition);
430      if (Next->isNot(tok::colon)) {
431        // default not followed by ':' is not a case label; treat it like
432        // an identifier.
433        parseStructuralElement();
434        break;
435      }
436      // Else, if it is 'default:', fall through to the case handling.
437      [[fallthrough]];
438    }
439    case tok::kw_case:
440      if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
441          (Style.isJavaScript() && Line->MustBeDeclaration)) {
442        // Proto: there are no switch/case statements
443        // Verilog: Case labels don't have this word. We handle case
444        // labels including default in TokenAnnotator.
445        // JavaScript: A 'case: string' style field declaration.
446        ParseDefault();
447        break;
448      }
449      if (!SwitchLabelEncountered &&
450          (Style.IndentCaseLabels ||
451           (Line->InPPDirective && Line->Level == 1))) {
452        ++Line->Level;
453      }
454      SwitchLabelEncountered = true;
455      parseStructuralElement();
456      break;
457    case tok::l_square:
458      if (Style.isCSharp()) {
459        nextToken();
460        parseCSharpAttribute();
461        break;
462      }
463      if (handleCppAttributes())
464        break;
465      [[fallthrough]];
466    default:
467      ParseDefault();
468      break;
469    }
470  } while (!eof());
471
472  return false;
473}
474
475void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
476  // We'll parse forward through the tokens until we hit
477  // a closing brace or eof - note that getNextToken() will
478  // parse macros, so this will magically work inside macro
479  // definitions, too.
480  unsigned StoredPosition = Tokens->getPosition();
481  FormatToken *Tok = FormatTok;
482  const FormatToken *PrevTok = Tok->Previous;
483  // Keep a stack of positions of lbrace tokens. We will
484  // update information about whether an lbrace starts a
485  // braced init list or a different block during the loop.
486  struct StackEntry {
487    FormatToken *Tok;
488    const FormatToken *PrevTok;
489  };
490  SmallVector<StackEntry, 8> LBraceStack;
491  assert(Tok->is(tok::l_brace));
492
493  do {
494    FormatToken *NextTok;
495    do {
496      NextTok = Tokens->getNextToken();
497    } while (NextTok->is(tok::comment));
498
499    if (!Line->InMacroBody) {
500      // Skip PPDirective lines and comments.
501      while (NextTok->is(tok::hash)) {
502        do {
503          NextTok = Tokens->getNextToken();
504        } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof));
505
506        while (NextTok->is(tok::comment))
507          NextTok = Tokens->getNextToken();
508      }
509    }
510
511    switch (Tok->Tok.getKind()) {
512    case tok::l_brace:
513      if (Style.isJavaScript() && PrevTok) {
514        if (PrevTok->isOneOf(tok::colon, tok::less)) {
515          // A ':' indicates this code is in a type, or a braced list
516          // following a label in an object literal ({a: {b: 1}}).
517          // A '<' could be an object used in a comparison, but that is nonsense
518          // code (can never return true), so more likely it is a generic type
519          // argument (`X<{a: string; b: number}>`).
520          // The code below could be confused by semicolons between the
521          // individual members in a type member list, which would normally
522          // trigger BK_Block. In both cases, this must be parsed as an inline
523          // braced init.
524          Tok->setBlockKind(BK_BracedInit);
525        } else if (PrevTok->is(tok::r_paren)) {
526          // `) { }` can only occur in function or method declarations in JS.
527          Tok->setBlockKind(BK_Block);
528        }
529      } else {
530        Tok->setBlockKind(BK_Unknown);
531      }
532      LBraceStack.push_back({Tok, PrevTok});
533      break;
534    case tok::r_brace:
535      if (LBraceStack.empty())
536        break;
537      if (LBraceStack.back().Tok->is(BK_Unknown)) {
538        bool ProbablyBracedList = false;
539        if (Style.Language == FormatStyle::LK_Proto) {
540          ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
541        } else {
542          // Using OriginalColumn to distinguish between ObjC methods and
543          // binary operators is a bit hacky.
544          bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
545                                  NextTok->OriginalColumn == 0;
546
547          // Try to detect a braced list. Note that regardless how we mark inner
548          // braces here, we will overwrite the BlockKind later if we parse a
549          // braced list (where all blocks inside are by default braced lists),
550          // or when we explicitly detect blocks (for example while parsing
551          // lambdas).
552
553          // If we already marked the opening brace as braced list, the closing
554          // must also be part of it.
555          ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace);
556
557          ProbablyBracedList = ProbablyBracedList ||
558                               (Style.isJavaScript() &&
559                                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
560                                                 Keywords.kw_as));
561          ProbablyBracedList = ProbablyBracedList ||
562                               (Style.isCpp() && NextTok->is(tok::l_paren));
563
564          // If there is a comma, semicolon or right paren after the closing
565          // brace, we assume this is a braced initializer list.
566          // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
567          // braced list in JS.
568          ProbablyBracedList =
569              ProbablyBracedList ||
570              NextTok->isOneOf(tok::comma, tok::period, tok::colon,
571                               tok::r_paren, tok::r_square, tok::ellipsis);
572
573          // Distinguish between braced list in a constructor initializer list
574          // followed by constructor body, or just adjacent blocks.
575          ProbablyBracedList =
576              ProbablyBracedList ||
577              (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
578               LBraceStack.back().PrevTok->isOneOf(tok::identifier,
579                                                   tok::greater));
580
581          ProbablyBracedList =
582              ProbablyBracedList ||
583              (NextTok->is(tok::identifier) &&
584               !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
585
586          ProbablyBracedList = ProbablyBracedList ||
587                               (NextTok->is(tok::semi) &&
588                                (!ExpectClassBody || LBraceStack.size() != 1));
589
590          ProbablyBracedList =
591              ProbablyBracedList ||
592              (NextTok->isBinaryOperator() && !NextIsObjCMethod);
593
594          if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
595            // We can have an array subscript after a braced init
596            // list, but C++11 attributes are expected after blocks.
597            NextTok = Tokens->getNextToken();
598            ProbablyBracedList = NextTok->isNot(tok::l_square);
599          }
600
601          // Cpp macro definition body that is a nonempty braced list or block:
602          if (Style.isCpp() && Line->InMacroBody && PrevTok != FormatTok &&
603              !FormatTok->Previous && NextTok->is(tok::eof) &&
604              // A statement can end with only `;` (simple statement), a block
605              // closing brace (compound statement), or `:` (label statement).
606              // If PrevTok is a block opening brace, Tok ends an empty block.
607              !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) {
608            ProbablyBracedList = true;
609          }
610        }
611        if (ProbablyBracedList) {
612          Tok->setBlockKind(BK_BracedInit);
613          LBraceStack.back().Tok->setBlockKind(BK_BracedInit);
614        } else {
615          Tok->setBlockKind(BK_Block);
616          LBraceStack.back().Tok->setBlockKind(BK_Block);
617        }
618      }
619      LBraceStack.pop_back();
620      break;
621    case tok::identifier:
622      if (Tok->isNot(TT_StatementMacro))
623        break;
624      [[fallthrough]];
625    case tok::at:
626    case tok::semi:
627    case tok::kw_if:
628    case tok::kw_while:
629    case tok::kw_for:
630    case tok::kw_switch:
631    case tok::kw_try:
632    case tok::kw___try:
633      if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
634        LBraceStack.back().Tok->setBlockKind(BK_Block);
635      break;
636    default:
637      break;
638    }
639
640    PrevTok = Tok;
641    Tok = NextTok;
642  } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
643
644  // Assume other blocks for all unclosed opening braces.
645  for (const auto &Entry : LBraceStack)
646    if (Entry.Tok->is(BK_Unknown))
647      Entry.Tok->setBlockKind(BK_Block);
648
649  FormatTok = Tokens->setPosition(StoredPosition);
650}
651
652// Sets the token type of the directly previous right brace.
653void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
654  if (auto Prev = FormatTok->getPreviousNonComment();
655      Prev && Prev->is(tok::r_brace)) {
656    Prev->setFinalizedType(Type);
657  }
658}
659
660template <class T>
661static inline void hash_combine(std::size_t &seed, const T &v) {
662  std::hash<T> hasher;
663  seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
664}
665
666size_t UnwrappedLineParser::computePPHash() const {
667  size_t h = 0;
668  for (const auto &i : PPStack) {
669    hash_combine(h, size_t(i.Kind));
670    hash_combine(h, i.Line);
671  }
672  return h;
673}
674
675// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
676// is not null, subtracts its length (plus the preceding space) when computing
677// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
678// running the token annotator on it so that we can restore them afterward.
679bool UnwrappedLineParser::mightFitOnOneLine(
680    UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
681  const auto ColumnLimit = Style.ColumnLimit;
682  if (ColumnLimit == 0)
683    return true;
684
685  auto &Tokens = ParsedLine.Tokens;
686  assert(!Tokens.empty());
687
688  const auto *LastToken = Tokens.back().Tok;
689  assert(LastToken);
690
691  SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
692
693  int Index = 0;
694  for (const auto &Token : Tokens) {
695    assert(Token.Tok);
696    auto &SavedToken = SavedTokens[Index++];
697    SavedToken.Tok = new FormatToken;
698    SavedToken.Tok->copyFrom(*Token.Tok);
699    SavedToken.Children = std::move(Token.Children);
700  }
701
702  AnnotatedLine Line(ParsedLine);
703  assert(Line.Last == LastToken);
704
705  TokenAnnotator Annotator(Style, Keywords);
706  Annotator.annotate(Line);
707  Annotator.calculateFormattingInformation(Line);
708
709  auto Length = LastToken->TotalLength;
710  if (OpeningBrace) {
711    assert(OpeningBrace != Tokens.front().Tok);
712    if (auto Prev = OpeningBrace->Previous;
713        Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
714      Length -= ColumnLimit;
715    }
716    Length -= OpeningBrace->TokenText.size() + 1;
717  }
718
719  if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
720    assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
721    Length -= FirstToken->TokenText.size() + 1;
722  }
723
724  Index = 0;
725  for (auto &Token : Tokens) {
726    const auto &SavedToken = SavedTokens[Index++];
727    Token.Tok->copyFrom(*SavedToken.Tok);
728    Token.Children = std::move(SavedToken.Children);
729    delete SavedToken.Tok;
730  }
731
732  // If these change PPLevel needs to be used for get correct indentation.
733  assert(!Line.InMacroBody);
734  assert(!Line.InPPDirective);
735  return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
736}
737
738FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
739                                             unsigned AddLevels, bool MunchSemi,
740                                             bool KeepBraces,
741                                             IfStmtKind *IfKind,
742                                             bool UnindentWhitesmithsBraces) {
743  auto HandleVerilogBlockLabel = [this]() {
744    // ":" name
745    if (Style.isVerilog() && FormatTok->is(tok::colon)) {
746      nextToken();
747      if (Keywords.isVerilogIdentifier(*FormatTok))
748        nextToken();
749    }
750  };
751
752  // Whether this is a Verilog-specific block that has a special header like a
753  // module.
754  const bool VerilogHierarchy =
755      Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
756  assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
757          (Style.isVerilog() &&
758           (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
759         "'{' or macro block token expected");
760  FormatToken *Tok = FormatTok;
761  const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
762  auto Index = CurrentLines->size();
763  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
764  FormatTok->setBlockKind(BK_Block);
765
766  // For Whitesmiths mode, jump to the next level prior to skipping over the
767  // braces.
768  if (!VerilogHierarchy && AddLevels > 0 &&
769      Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
770    ++Line->Level;
771  }
772
773  size_t PPStartHash = computePPHash();
774
775  const unsigned InitialLevel = Line->Level;
776  if (VerilogHierarchy) {
777    AddLevels += parseVerilogHierarchyHeader();
778  } else {
779    nextToken(/*LevelDifference=*/AddLevels);
780    HandleVerilogBlockLabel();
781  }
782
783  // Bail out if there are too many levels. Otherwise, the stack might overflow.
784  if (Line->Level > 300)
785    return nullptr;
786
787  if (MacroBlock && FormatTok->is(tok::l_paren))
788    parseParens();
789
790  size_t NbPreprocessorDirectives =
791      !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
792  addUnwrappedLine();
793  size_t OpeningLineIndex =
794      CurrentLines->empty()
795          ? (UnwrappedLine::kInvalidIndex)
796          : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
797
798  // Whitesmiths is weird here. The brace needs to be indented for the namespace
799  // block, but the block itself may not be indented depending on the style
800  // settings. This allows the format to back up one level in those cases.
801  if (UnindentWhitesmithsBraces)
802    --Line->Level;
803
804  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
805                                          MustBeDeclaration);
806  if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
807    Line->Level += AddLevels;
808
809  FormatToken *IfLBrace = nullptr;
810  const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
811
812  if (eof())
813    return IfLBrace;
814
815  if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
816                 : FormatTok->isNot(tok::r_brace)) {
817    Line->Level = InitialLevel;
818    FormatTok->setBlockKind(BK_Block);
819    return IfLBrace;
820  }
821
822  if (FormatTok->is(tok::r_brace) && Tok->is(TT_NamespaceLBrace))
823    FormatTok->setFinalizedType(TT_NamespaceRBrace);
824
825  const bool IsFunctionRBrace =
826      FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
827
828  auto RemoveBraces = [=]() mutable {
829    if (!SimpleBlock)
830      return false;
831    assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
832    assert(FormatTok->is(tok::r_brace));
833    const bool WrappedOpeningBrace = !Tok->Previous;
834    if (WrappedOpeningBrace && FollowedByComment)
835      return false;
836    const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
837    if (KeepBraces && !HasRequiredIfBraces)
838      return false;
839    if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
840      const FormatToken *Previous = Tokens->getPreviousToken();
841      assert(Previous);
842      if (Previous->is(tok::r_brace) && !Previous->Optional)
843        return false;
844    }
845    assert(!CurrentLines->empty());
846    auto &LastLine = CurrentLines->back();
847    if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
848      return false;
849    if (Tok->is(TT_ElseLBrace))
850      return true;
851    if (WrappedOpeningBrace) {
852      assert(Index > 0);
853      --Index; // The line above the wrapped l_brace.
854      Tok = nullptr;
855    }
856    return mightFitOnOneLine((*CurrentLines)[Index], Tok);
857  };
858  if (RemoveBraces()) {
859    Tok->MatchingParen = FormatTok;
860    FormatTok->MatchingParen = Tok;
861  }
862
863  size_t PPEndHash = computePPHash();
864
865  // Munch the closing brace.
866  nextToken(/*LevelDifference=*/-AddLevels);
867
868  // When this is a function block and there is an unnecessary semicolon
869  // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
870  // it later).
871  if (Style.RemoveSemicolon && IsFunctionRBrace) {
872    while (FormatTok->is(tok::semi)) {
873      FormatTok->Optional = true;
874      nextToken();
875    }
876  }
877
878  HandleVerilogBlockLabel();
879
880  if (MacroBlock && FormatTok->is(tok::l_paren))
881    parseParens();
882
883  Line->Level = InitialLevel;
884
885  if (FormatTok->is(tok::kw_noexcept)) {
886    // A noexcept in a requires expression.
887    nextToken();
888  }
889
890  if (FormatTok->is(tok::arrow)) {
891    // Following the } or noexcept we can find a trailing return type arrow
892    // as part of an implicit conversion constraint.
893    nextToken();
894    parseStructuralElement();
895  }
896
897  if (MunchSemi && FormatTok->is(tok::semi))
898    nextToken();
899
900  if (PPStartHash == PPEndHash) {
901    Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
902    if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
903      // Update the opening line to add the forward reference as well
904      (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
905          CurrentLines->size() - 1;
906    }
907  }
908
909  return IfLBrace;
910}
911
912static bool isGoogScope(const UnwrappedLine &Line) {
913  // FIXME: Closure-library specific stuff should not be hard-coded but be
914  // configurable.
915  if (Line.Tokens.size() < 4)
916    return false;
917  auto I = Line.Tokens.begin();
918  if (I->Tok->TokenText != "goog")
919    return false;
920  ++I;
921  if (I->Tok->isNot(tok::period))
922    return false;
923  ++I;
924  if (I->Tok->TokenText != "scope")
925    return false;
926  ++I;
927  return I->Tok->is(tok::l_paren);
928}
929
930static bool isIIFE(const UnwrappedLine &Line,
931                   const AdditionalKeywords &Keywords) {
932  // Look for the start of an immediately invoked anonymous function.
933  // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
934  // This is commonly done in JavaScript to create a new, anonymous scope.
935  // Example: (function() { ... })()
936  if (Line.Tokens.size() < 3)
937    return false;
938  auto I = Line.Tokens.begin();
939  if (I->Tok->isNot(tok::l_paren))
940    return false;
941  ++I;
942  if (I->Tok->isNot(Keywords.kw_function))
943    return false;
944  ++I;
945  return I->Tok->is(tok::l_paren);
946}
947
948static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
949                                   const FormatToken &InitialToken) {
950  tok::TokenKind Kind = InitialToken.Tok.getKind();
951  if (InitialToken.is(TT_NamespaceMacro))
952    Kind = tok::kw_namespace;
953
954  switch (Kind) {
955  case tok::kw_namespace:
956    return Style.BraceWrapping.AfterNamespace;
957  case tok::kw_class:
958    return Style.BraceWrapping.AfterClass;
959  case tok::kw_union:
960    return Style.BraceWrapping.AfterUnion;
961  case tok::kw_struct:
962    return Style.BraceWrapping.AfterStruct;
963  case tok::kw_enum:
964    return Style.BraceWrapping.AfterEnum;
965  default:
966    return false;
967  }
968}
969
970void UnwrappedLineParser::parseChildBlock() {
971  assert(FormatTok->is(tok::l_brace));
972  FormatTok->setBlockKind(BK_Block);
973  const FormatToken *OpeningBrace = FormatTok;
974  nextToken();
975  {
976    bool SkipIndent = (Style.isJavaScript() &&
977                       (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
978    ScopedLineState LineState(*this);
979    ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
980                                            /*MustBeDeclaration=*/false);
981    Line->Level += SkipIndent ? 0 : 1;
982    parseLevel(OpeningBrace);
983    flushComments(isOnNewLine(*FormatTok));
984    Line->Level -= SkipIndent ? 0 : 1;
985  }
986  nextToken();
987}
988
989void UnwrappedLineParser::parsePPDirective() {
990  assert(FormatTok->is(tok::hash) && "'#' expected");
991  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
992
993  nextToken();
994
995  if (!FormatTok->Tok.getIdentifierInfo()) {
996    parsePPUnknown();
997    return;
998  }
999
1000  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1001  case tok::pp_define:
1002    parsePPDefine();
1003    return;
1004  case tok::pp_if:
1005    parsePPIf(/*IfDef=*/false);
1006    break;
1007  case tok::pp_ifdef:
1008  case tok::pp_ifndef:
1009    parsePPIf(/*IfDef=*/true);
1010    break;
1011  case tok::pp_else:
1012  case tok::pp_elifdef:
1013  case tok::pp_elifndef:
1014  case tok::pp_elif:
1015    parsePPElse();
1016    break;
1017  case tok::pp_endif:
1018    parsePPEndIf();
1019    break;
1020  case tok::pp_pragma:
1021    parsePPPragma();
1022    break;
1023  default:
1024    parsePPUnknown();
1025    break;
1026  }
1027}
1028
1029void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1030  size_t Line = CurrentLines->size();
1031  if (CurrentLines == &PreprocessorDirectives)
1032    Line += Lines.size();
1033
1034  if (Unreachable ||
1035      (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1036    PPStack.push_back({PP_Unreachable, Line});
1037  } else {
1038    PPStack.push_back({PP_Conditional, Line});
1039  }
1040}
1041
1042void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1043  ++PPBranchLevel;
1044  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1045  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1046    PPLevelBranchIndex.push_back(0);
1047    PPLevelBranchCount.push_back(0);
1048  }
1049  PPChainBranchIndex.push(Unreachable ? -1 : 0);
1050  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1051  conditionalCompilationCondition(Unreachable || Skip);
1052}
1053
1054void UnwrappedLineParser::conditionalCompilationAlternative() {
1055  if (!PPStack.empty())
1056    PPStack.pop_back();
1057  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1058  if (!PPChainBranchIndex.empty())
1059    ++PPChainBranchIndex.top();
1060  conditionalCompilationCondition(
1061      PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1062      PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1063}
1064
1065void UnwrappedLineParser::conditionalCompilationEnd() {
1066  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1067  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1068    if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1069      PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1070  }
1071  // Guard against #endif's without #if.
1072  if (PPBranchLevel > -1)
1073    --PPBranchLevel;
1074  if (!PPChainBranchIndex.empty())
1075    PPChainBranchIndex.pop();
1076  if (!PPStack.empty())
1077    PPStack.pop_back();
1078}
1079
1080void UnwrappedLineParser::parsePPIf(bool IfDef) {
1081  bool IfNDef = FormatTok->is(tok::pp_ifndef);
1082  nextToken();
1083  bool Unreachable = false;
1084  if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1085    Unreachable = true;
1086  if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1087    Unreachable = true;
1088  conditionalCompilationStart(Unreachable);
1089  FormatToken *IfCondition = FormatTok;
1090  // If there's a #ifndef on the first line, and the only lines before it are
1091  // comments, it could be an include guard.
1092  bool MaybeIncludeGuard = IfNDef;
1093  if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1094    for (auto &Line : Lines) {
1095      if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1096        MaybeIncludeGuard = false;
1097        IncludeGuard = IG_Rejected;
1098        break;
1099      }
1100    }
1101  }
1102  --PPBranchLevel;
1103  parsePPUnknown();
1104  ++PPBranchLevel;
1105  if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1106    IncludeGuard = IG_IfNdefed;
1107    IncludeGuardToken = IfCondition;
1108  }
1109}
1110
1111void UnwrappedLineParser::parsePPElse() {
1112  // If a potential include guard has an #else, it's not an include guard.
1113  if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1114    IncludeGuard = IG_Rejected;
1115  // Don't crash when there is an #else without an #if.
1116  assert(PPBranchLevel >= -1);
1117  if (PPBranchLevel == -1)
1118    conditionalCompilationStart(/*Unreachable=*/true);
1119  conditionalCompilationAlternative();
1120  --PPBranchLevel;
1121  parsePPUnknown();
1122  ++PPBranchLevel;
1123}
1124
1125void UnwrappedLineParser::parsePPEndIf() {
1126  conditionalCompilationEnd();
1127  parsePPUnknown();
1128  // If the #endif of a potential include guard is the last thing in the file,
1129  // then we found an include guard.
1130  if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1131      Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1132    IncludeGuard = IG_Found;
1133  }
1134}
1135
1136void UnwrappedLineParser::parsePPDefine() {
1137  nextToken();
1138
1139  if (!FormatTok->Tok.getIdentifierInfo()) {
1140    IncludeGuard = IG_Rejected;
1141    IncludeGuardToken = nullptr;
1142    parsePPUnknown();
1143    return;
1144  }
1145
1146  if (IncludeGuard == IG_IfNdefed &&
1147      IncludeGuardToken->TokenText == FormatTok->TokenText) {
1148    IncludeGuard = IG_Defined;
1149    IncludeGuardToken = nullptr;
1150    for (auto &Line : Lines) {
1151      if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1152        IncludeGuard = IG_Rejected;
1153        break;
1154      }
1155    }
1156  }
1157
1158  // In the context of a define, even keywords should be treated as normal
1159  // identifiers. Setting the kind to identifier is not enough, because we need
1160  // to treat additional keywords like __except as well, which are already
1161  // identifiers. Setting the identifier info to null interferes with include
1162  // guard processing above, and changes preprocessing nesting.
1163  FormatTok->Tok.setKind(tok::identifier);
1164  FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1165  nextToken();
1166  if (FormatTok->Tok.getKind() == tok::l_paren &&
1167      !FormatTok->hasWhitespaceBefore()) {
1168    parseParens();
1169  }
1170  if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1171    Line->Level += PPBranchLevel + 1;
1172  addUnwrappedLine();
1173  ++Line->Level;
1174
1175  Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1176  assert((int)Line->PPLevel >= 0);
1177  Line->InMacroBody = true;
1178
1179  if (Style.SkipMacroDefinitionBody) {
1180    do {
1181      FormatTok->Finalized = true;
1182      nextToken();
1183    } while (!eof());
1184    addUnwrappedLine();
1185    return;
1186  }
1187
1188  if (FormatTok->is(tok::identifier) &&
1189      Tokens->peekNextToken()->is(tok::colon)) {
1190    nextToken();
1191    nextToken();
1192  }
1193
1194  // Errors during a preprocessor directive can only affect the layout of the
1195  // preprocessor directive, and thus we ignore them. An alternative approach
1196  // would be to use the same approach we use on the file level (no
1197  // re-indentation if there was a structural error) within the macro
1198  // definition.
1199  parseFile();
1200}
1201
1202void UnwrappedLineParser::parsePPPragma() {
1203  Line->InPragmaDirective = true;
1204  parsePPUnknown();
1205}
1206
1207void UnwrappedLineParser::parsePPUnknown() {
1208  do {
1209    nextToken();
1210  } while (!eof());
1211  if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1212    Line->Level += PPBranchLevel + 1;
1213  addUnwrappedLine();
1214}
1215
1216// Here we exclude certain tokens that are not usually the first token in an
1217// unwrapped line. This is used in attempt to distinguish macro calls without
1218// trailing semicolons from other constructs split to several lines.
1219static bool tokenCanStartNewLine(const FormatToken &Tok) {
1220  // Semicolon can be a null-statement, l_square can be a start of a macro or
1221  // a C++11 attribute, but this doesn't seem to be common.
1222  assert(Tok.isNot(TT_AttributeSquare));
1223  return !Tok.isOneOf(tok::semi, tok::l_brace,
1224                      // Tokens that can only be used as binary operators and a
1225                      // part of overloaded operator names.
1226                      tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1227                      tok::less, tok::greater, tok::slash, tok::percent,
1228                      tok::lessless, tok::greatergreater, tok::equal,
1229                      tok::plusequal, tok::minusequal, tok::starequal,
1230                      tok::slashequal, tok::percentequal, tok::ampequal,
1231                      tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1232                      tok::lesslessequal,
1233                      // Colon is used in labels, base class lists, initializer
1234                      // lists, range-based for loops, ternary operator, but
1235                      // should never be the first token in an unwrapped line.
1236                      tok::colon,
1237                      // 'noexcept' is a trailing annotation.
1238                      tok::kw_noexcept);
1239}
1240
1241static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1242                          const FormatToken *FormatTok) {
1243  // FIXME: This returns true for C/C++ keywords like 'struct'.
1244  return FormatTok->is(tok::identifier) &&
1245         (!FormatTok->Tok.getIdentifierInfo() ||
1246          !FormatTok->isOneOf(
1247              Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1248              Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1249              Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1250              Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1251              Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1252              Keywords.kw_instanceof, Keywords.kw_interface,
1253              Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1254}
1255
1256static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1257                                 const FormatToken *FormatTok) {
1258  return FormatTok->Tok.isLiteral() ||
1259         FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1260         mustBeJSIdent(Keywords, FormatTok);
1261}
1262
1263// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1264// when encountered after a value (see mustBeJSIdentOrValue).
1265static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1266                           const FormatToken *FormatTok) {
1267  return FormatTok->isOneOf(
1268      tok::kw_return, Keywords.kw_yield,
1269      // conditionals
1270      tok::kw_if, tok::kw_else,
1271      // loops
1272      tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1273      // switch/case
1274      tok::kw_switch, tok::kw_case,
1275      // exceptions
1276      tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1277      // declaration
1278      tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1279      Keywords.kw_async, Keywords.kw_function,
1280      // import/export
1281      Keywords.kw_import, tok::kw_export);
1282}
1283
1284// Checks whether a token is a type in K&R C (aka C78).
1285static bool isC78Type(const FormatToken &Tok) {
1286  return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1287                     tok::kw_unsigned, tok::kw_float, tok::kw_double,
1288                     tok::identifier);
1289}
1290
1291// This function checks whether a token starts the first parameter declaration
1292// in a K&R C (aka C78) function definition, e.g.:
1293//   int f(a, b)
1294//   short a, b;
1295//   {
1296//      return a + b;
1297//   }
1298static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1299                               const FormatToken *FuncName) {
1300  assert(Tok);
1301  assert(Next);
1302  assert(FuncName);
1303
1304  if (FuncName->isNot(tok::identifier))
1305    return false;
1306
1307  const FormatToken *Prev = FuncName->Previous;
1308  if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1309    return false;
1310
1311  if (!isC78Type(*Tok) &&
1312      !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1313    return false;
1314  }
1315
1316  if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1317    return false;
1318
1319  Tok = Tok->Previous;
1320  if (!Tok || Tok->isNot(tok::r_paren))
1321    return false;
1322
1323  Tok = Tok->Previous;
1324  if (!Tok || Tok->isNot(tok::identifier))
1325    return false;
1326
1327  return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1328}
1329
1330bool UnwrappedLineParser::parseModuleImport() {
1331  assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1332
1333  if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1334      !Token->Tok.getIdentifierInfo() &&
1335      !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1336    return false;
1337  }
1338
1339  nextToken();
1340  while (!eof()) {
1341    if (FormatTok->is(tok::colon)) {
1342      FormatTok->setFinalizedType(TT_ModulePartitionColon);
1343    }
1344    // Handle import <foo/bar.h> as we would an include statement.
1345    else if (FormatTok->is(tok::less)) {
1346      nextToken();
1347      while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1348        // Mark tokens up to the trailing line comments as implicit string
1349        // literals.
1350        if (FormatTok->isNot(tok::comment) &&
1351            !FormatTok->TokenText.starts_with("//")) {
1352          FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1353        }
1354        nextToken();
1355      }
1356    }
1357    if (FormatTok->is(tok::semi)) {
1358      nextToken();
1359      break;
1360    }
1361    nextToken();
1362  }
1363
1364  addUnwrappedLine();
1365  return true;
1366}
1367
1368// readTokenWithJavaScriptASI reads the next token and terminates the current
1369// line if JavaScript Automatic Semicolon Insertion must
1370// happen between the current token and the next token.
1371//
1372// This method is conservative - it cannot cover all edge cases of JavaScript,
1373// but only aims to correctly handle certain well known cases. It *must not*
1374// return true in speculative cases.
1375void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1376  FormatToken *Previous = FormatTok;
1377  readToken();
1378  FormatToken *Next = FormatTok;
1379
1380  bool IsOnSameLine =
1381      CommentsBeforeNextToken.empty()
1382          ? Next->NewlinesBefore == 0
1383          : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1384  if (IsOnSameLine)
1385    return;
1386
1387  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1388  bool PreviousStartsTemplateExpr =
1389      Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
1390  if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1391    // If the line contains an '@' sign, the previous token might be an
1392    // annotation, which can precede another identifier/value.
1393    bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1394      return LineNode.Tok->is(tok::at);
1395    });
1396    if (HasAt)
1397      return;
1398  }
1399  if (Next->is(tok::exclaim) && PreviousMustBeValue)
1400    return addUnwrappedLine();
1401  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1402  bool NextEndsTemplateExpr =
1403      Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
1404  if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1405      (PreviousMustBeValue ||
1406       Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1407                         tok::minusminus))) {
1408    return addUnwrappedLine();
1409  }
1410  if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1411      isJSDeclOrStmt(Keywords, Next)) {
1412    return addUnwrappedLine();
1413  }
1414}
1415
1416void UnwrappedLineParser::parseStructuralElement(
1417    const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1418    FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1419  if (Style.Language == FormatStyle::LK_TableGen &&
1420      FormatTok->is(tok::pp_include)) {
1421    nextToken();
1422    if (FormatTok->is(tok::string_literal))
1423      nextToken();
1424    addUnwrappedLine();
1425    return;
1426  }
1427
1428  if (Style.isCpp()) {
1429    while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1430    }
1431  } else if (Style.isVerilog()) {
1432    if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1433      parseForOrWhileLoop(/*HasParens=*/false);
1434      return;
1435    }
1436    if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1437      parseForOrWhileLoop();
1438      return;
1439    }
1440    if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1441                           Keywords.kw_assume, Keywords.kw_cover)) {
1442      parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1443      return;
1444    }
1445
1446    // Skip things that can exist before keywords like 'if' and 'case'.
1447    while (true) {
1448      if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1449                             Keywords.kw_unique0)) {
1450        nextToken();
1451      } else if (FormatTok->is(tok::l_paren) &&
1452                 Tokens->peekNextToken()->is(tok::star)) {
1453        parseParens();
1454      } else {
1455        break;
1456      }
1457    }
1458  }
1459
1460  // Tokens that only make sense at the beginning of a line.
1461  switch (FormatTok->Tok.getKind()) {
1462  case tok::kw_asm:
1463    nextToken();
1464    if (FormatTok->is(tok::l_brace)) {
1465      FormatTok->setFinalizedType(TT_InlineASMBrace);
1466      nextToken();
1467      while (FormatTok && !eof()) {
1468        if (FormatTok->is(tok::r_brace)) {
1469          FormatTok->setFinalizedType(TT_InlineASMBrace);
1470          nextToken();
1471          addUnwrappedLine();
1472          break;
1473        }
1474        FormatTok->Finalized = true;
1475        nextToken();
1476      }
1477    }
1478    break;
1479  case tok::kw_namespace:
1480    parseNamespace();
1481    return;
1482  case tok::kw_public:
1483  case tok::kw_protected:
1484  case tok::kw_private:
1485    if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1486        Style.isCSharp()) {
1487      nextToken();
1488    } else {
1489      parseAccessSpecifier();
1490    }
1491    return;
1492  case tok::kw_if: {
1493    if (Style.isJavaScript() && Line->MustBeDeclaration) {
1494      // field/method declaration.
1495      break;
1496    }
1497    FormatToken *Tok = parseIfThenElse(IfKind);
1498    if (IfLeftBrace)
1499      *IfLeftBrace = Tok;
1500    return;
1501  }
1502  case tok::kw_for:
1503  case tok::kw_while:
1504    if (Style.isJavaScript() && Line->MustBeDeclaration) {
1505      // field/method declaration.
1506      break;
1507    }
1508    parseForOrWhileLoop();
1509    return;
1510  case tok::kw_do:
1511    if (Style.isJavaScript() && Line->MustBeDeclaration) {
1512      // field/method declaration.
1513      break;
1514    }
1515    parseDoWhile();
1516    if (HasDoWhile)
1517      *HasDoWhile = true;
1518    return;
1519  case tok::kw_switch:
1520    if (Style.isJavaScript() && Line->MustBeDeclaration) {
1521      // 'switch: string' field declaration.
1522      break;
1523    }
1524    parseSwitch();
1525    return;
1526  case tok::kw_default:
1527    // In Verilog default along with other labels are handled in the next loop.
1528    if (Style.isVerilog())
1529      break;
1530    if (Style.isJavaScript() && Line->MustBeDeclaration) {
1531      // 'default: string' field declaration.
1532      break;
1533    }
1534    nextToken();
1535    if (FormatTok->is(tok::colon)) {
1536      FormatTok->setFinalizedType(TT_CaseLabelColon);
1537      parseLabel();
1538      return;
1539    }
1540    // e.g. "default void f() {}" in a Java interface.
1541    break;
1542  case tok::kw_case:
1543    // Proto: there are no switch/case statements.
1544    if (Style.Language == FormatStyle::LK_Proto) {
1545      nextToken();
1546      return;
1547    }
1548    if (Style.isVerilog()) {
1549      parseBlock();
1550      addUnwrappedLine();
1551      return;
1552    }
1553    if (Style.isJavaScript() && Line->MustBeDeclaration) {
1554      // 'case: string' field declaration.
1555      nextToken();
1556      break;
1557    }
1558    parseCaseLabel();
1559    return;
1560  case tok::kw_try:
1561  case tok::kw___try:
1562    if (Style.isJavaScript() && Line->MustBeDeclaration) {
1563      // field/method declaration.
1564      break;
1565    }
1566    parseTryCatch();
1567    return;
1568  case tok::kw_extern:
1569    nextToken();
1570    if (Style.isVerilog()) {
1571      // In Verilog and extern module declaration looks like a start of module.
1572      // But there is no body and endmodule. So we handle it separately.
1573      if (Keywords.isVerilogHierarchy(*FormatTok)) {
1574        parseVerilogHierarchyHeader();
1575        return;
1576      }
1577    } else if (FormatTok->is(tok::string_literal)) {
1578      nextToken();
1579      if (FormatTok->is(tok::l_brace)) {
1580        if (Style.BraceWrapping.AfterExternBlock)
1581          addUnwrappedLine();
1582        // Either we indent or for backwards compatibility we follow the
1583        // AfterExternBlock style.
1584        unsigned AddLevels =
1585            (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1586                    (Style.BraceWrapping.AfterExternBlock &&
1587                     Style.IndentExternBlock ==
1588                         FormatStyle::IEBS_AfterExternBlock)
1589                ? 1u
1590                : 0u;
1591        parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1592        addUnwrappedLine();
1593        return;
1594      }
1595    }
1596    break;
1597  case tok::kw_export:
1598    if (Style.isJavaScript()) {
1599      parseJavaScriptEs6ImportExport();
1600      return;
1601    }
1602    if (Style.isCpp()) {
1603      nextToken();
1604      if (FormatTok->is(tok::kw_namespace)) {
1605        parseNamespace();
1606        return;
1607      }
1608      if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1609        return;
1610    }
1611    break;
1612  case tok::kw_inline:
1613    nextToken();
1614    if (FormatTok->is(tok::kw_namespace)) {
1615      parseNamespace();
1616      return;
1617    }
1618    break;
1619  case tok::identifier:
1620    if (FormatTok->is(TT_ForEachMacro)) {
1621      parseForOrWhileLoop();
1622      return;
1623    }
1624    if (FormatTok->is(TT_MacroBlockBegin)) {
1625      parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1626                 /*MunchSemi=*/false);
1627      return;
1628    }
1629    if (FormatTok->is(Keywords.kw_import)) {
1630      if (Style.isJavaScript()) {
1631        parseJavaScriptEs6ImportExport();
1632        return;
1633      }
1634      if (Style.Language == FormatStyle::LK_Proto) {
1635        nextToken();
1636        if (FormatTok->is(tok::kw_public))
1637          nextToken();
1638        if (FormatTok->isNot(tok::string_literal))
1639          return;
1640        nextToken();
1641        if (FormatTok->is(tok::semi))
1642          nextToken();
1643        addUnwrappedLine();
1644        return;
1645      }
1646      if (Style.isCpp() && parseModuleImport())
1647        return;
1648    }
1649    if (Style.isCpp() &&
1650        FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1651                           Keywords.kw_slots, Keywords.kw_qslots)) {
1652      nextToken();
1653      if (FormatTok->is(tok::colon)) {
1654        nextToken();
1655        addUnwrappedLine();
1656        return;
1657      }
1658    }
1659    if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1660      parseStatementMacro();
1661      return;
1662    }
1663    if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1664      parseNamespace();
1665      return;
1666    }
1667    // In Verilog labels can be any expression, so we don't do them here.
1668    // JS doesn't have macros, and within classes colons indicate fields, not
1669    // labels.
1670    // TableGen doesn't have labels.
1671    if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1672        Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) {
1673      nextToken();
1674      Line->Tokens.begin()->Tok->MustBreakBefore = true;
1675      FormatTok->setFinalizedType(TT_GotoLabelColon);
1676      parseLabel(!Style.IndentGotoLabels);
1677      if (HasLabel)
1678        *HasLabel = true;
1679      return;
1680    }
1681    // In all other cases, parse the declaration.
1682    break;
1683  default:
1684    break;
1685  }
1686
1687  const bool InRequiresExpression =
1688      OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
1689  do {
1690    const FormatToken *Previous = FormatTok->Previous;
1691    switch (FormatTok->Tok.getKind()) {
1692    case tok::at:
1693      nextToken();
1694      if (FormatTok->is(tok::l_brace)) {
1695        nextToken();
1696        parseBracedList();
1697        break;
1698      } else if (Style.Language == FormatStyle::LK_Java &&
1699                 FormatTok->is(Keywords.kw_interface)) {
1700        nextToken();
1701        break;
1702      }
1703      switch (FormatTok->Tok.getObjCKeywordID()) {
1704      case tok::objc_public:
1705      case tok::objc_protected:
1706      case tok::objc_package:
1707      case tok::objc_private:
1708        return parseAccessSpecifier();
1709      case tok::objc_interface:
1710      case tok::objc_implementation:
1711        return parseObjCInterfaceOrImplementation();
1712      case tok::objc_protocol:
1713        if (parseObjCProtocol())
1714          return;
1715        break;
1716      case tok::objc_end:
1717        return; // Handled by the caller.
1718      case tok::objc_optional:
1719      case tok::objc_required:
1720        nextToken();
1721        addUnwrappedLine();
1722        return;
1723      case tok::objc_autoreleasepool:
1724        nextToken();
1725        if (FormatTok->is(tok::l_brace)) {
1726          if (Style.BraceWrapping.AfterControlStatement ==
1727              FormatStyle::BWACS_Always) {
1728            addUnwrappedLine();
1729          }
1730          parseBlock();
1731        }
1732        addUnwrappedLine();
1733        return;
1734      case tok::objc_synchronized:
1735        nextToken();
1736        if (FormatTok->is(tok::l_paren)) {
1737          // Skip synchronization object
1738          parseParens();
1739        }
1740        if (FormatTok->is(tok::l_brace)) {
1741          if (Style.BraceWrapping.AfterControlStatement ==
1742              FormatStyle::BWACS_Always) {
1743            addUnwrappedLine();
1744          }
1745          parseBlock();
1746        }
1747        addUnwrappedLine();
1748        return;
1749      case tok::objc_try:
1750        // This branch isn't strictly necessary (the kw_try case below would
1751        // do this too after the tok::at is parsed above).  But be explicit.
1752        parseTryCatch();
1753        return;
1754      default:
1755        break;
1756      }
1757      break;
1758    case tok::kw_requires: {
1759      if (Style.isCpp()) {
1760        bool ParsedClause = parseRequires();
1761        if (ParsedClause)
1762          return;
1763      } else {
1764        nextToken();
1765      }
1766      break;
1767    }
1768    case tok::kw_enum:
1769      // Ignore if this is part of "template <enum ...".
1770      if (Previous && Previous->is(tok::less)) {
1771        nextToken();
1772        break;
1773      }
1774
1775      // parseEnum falls through and does not yet add an unwrapped line as an
1776      // enum definition can start a structural element.
1777      if (!parseEnum())
1778        break;
1779      // This only applies to C++ and Verilog.
1780      if (!Style.isCpp() && !Style.isVerilog()) {
1781        addUnwrappedLine();
1782        return;
1783      }
1784      break;
1785    case tok::kw_typedef:
1786      nextToken();
1787      if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1788                             Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1789                             Keywords.kw_CF_CLOSED_ENUM,
1790                             Keywords.kw_NS_CLOSED_ENUM)) {
1791        parseEnum();
1792      }
1793      break;
1794    case tok::kw_class:
1795      if (Style.isVerilog()) {
1796        parseBlock();
1797        addUnwrappedLine();
1798        return;
1799      }
1800      if (Style.isTableGen()) {
1801        // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1802        // This is same as def and so on.
1803        nextToken();
1804        break;
1805      }
1806      [[fallthrough]];
1807    case tok::kw_struct:
1808    case tok::kw_union:
1809      if (parseStructLike())
1810        return;
1811      break;
1812    case tok::kw_decltype:
1813      nextToken();
1814      if (FormatTok->is(tok::l_paren)) {
1815        parseParens();
1816        assert(FormatTok->Previous);
1817        if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1818                                              tok::l_paren)) {
1819          Line->SeenDecltypeAuto = true;
1820        }
1821      }
1822      break;
1823    case tok::period:
1824      nextToken();
1825      // In Java, classes have an implicit static member "class".
1826      if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1827          FormatTok->is(tok::kw_class)) {
1828        nextToken();
1829      }
1830      if (Style.isJavaScript() && FormatTok &&
1831          FormatTok->Tok.getIdentifierInfo()) {
1832        // JavaScript only has pseudo keywords, all keywords are allowed to
1833        // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1834        nextToken();
1835      }
1836      break;
1837    case tok::semi:
1838      nextToken();
1839      addUnwrappedLine();
1840      return;
1841    case tok::r_brace:
1842      addUnwrappedLine();
1843      return;
1844    case tok::l_paren: {
1845      parseParens();
1846      // Break the unwrapped line if a K&R C function definition has a parameter
1847      // declaration.
1848      if (OpeningBrace || !Style.isCpp() || !Previous || eof())
1849        break;
1850      if (isC78ParameterDecl(FormatTok,
1851                             Tokens->peekNextToken(/*SkipComment=*/true),
1852                             Previous)) {
1853        addUnwrappedLine();
1854        return;
1855      }
1856      break;
1857    }
1858    case tok::kw_operator:
1859      nextToken();
1860      if (FormatTok->isBinaryOperator())
1861        nextToken();
1862      break;
1863    case tok::caret:
1864      nextToken();
1865      // Block return type.
1866      if (FormatTok->Tok.isAnyIdentifier() ||
1867          FormatTok->isSimpleTypeSpecifier()) {
1868        nextToken();
1869        // Return types: pointers are ok too.
1870        while (FormatTok->is(tok::star))
1871          nextToken();
1872      }
1873      // Block argument list.
1874      if (FormatTok->is(tok::l_paren))
1875        parseParens();
1876      // Block body.
1877      if (FormatTok->is(tok::l_brace))
1878        parseChildBlock();
1879      break;
1880    case tok::l_brace:
1881      if (InRequiresExpression)
1882        FormatTok->setFinalizedType(TT_BracedListLBrace);
1883      if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1884        IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1885        // A block outside of parentheses must be the last part of a
1886        // structural element.
1887        // FIXME: Figure out cases where this is not true, and add projections
1888        // for them (the one we know is missing are lambdas).
1889        if (Style.Language == FormatStyle::LK_Java &&
1890            Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1891          // If necessary, we could set the type to something different than
1892          // TT_FunctionLBrace.
1893          if (Style.BraceWrapping.AfterControlStatement ==
1894              FormatStyle::BWACS_Always) {
1895            addUnwrappedLine();
1896          }
1897        } else if (Style.BraceWrapping.AfterFunction) {
1898          addUnwrappedLine();
1899        }
1900        FormatTok->setFinalizedType(TT_FunctionLBrace);
1901        parseBlock();
1902        IsDecltypeAutoFunction = false;
1903        addUnwrappedLine();
1904        return;
1905      }
1906      // Otherwise this was a braced init list, and the structural
1907      // element continues.
1908      break;
1909    case tok::kw_try:
1910      if (Style.isJavaScript() && Line->MustBeDeclaration) {
1911        // field/method declaration.
1912        nextToken();
1913        break;
1914      }
1915      // We arrive here when parsing function-try blocks.
1916      if (Style.BraceWrapping.AfterFunction)
1917        addUnwrappedLine();
1918      parseTryCatch();
1919      return;
1920    case tok::identifier: {
1921      if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1922          Line->MustBeDeclaration) {
1923        addUnwrappedLine();
1924        parseCSharpGenericTypeConstraint();
1925        break;
1926      }
1927      if (FormatTok->is(TT_MacroBlockEnd)) {
1928        addUnwrappedLine();
1929        return;
1930      }
1931
1932      // Function declarations (as opposed to function expressions) are parsed
1933      // on their own unwrapped line by continuing this loop. Function
1934      // expressions (functions that are not on their own line) must not create
1935      // a new unwrapped line, so they are special cased below.
1936      size_t TokenCount = Line->Tokens.size();
1937      if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1938          (TokenCount > 1 ||
1939           (TokenCount == 1 &&
1940            Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1941        tryToParseJSFunction();
1942        break;
1943      }
1944      if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1945          FormatTok->is(Keywords.kw_interface)) {
1946        if (Style.isJavaScript()) {
1947          // In JavaScript/TypeScript, "interface" can be used as a standalone
1948          // identifier, e.g. in `var interface = 1;`. If "interface" is
1949          // followed by another identifier, it is very like to be an actual
1950          // interface declaration.
1951          unsigned StoredPosition = Tokens->getPosition();
1952          FormatToken *Next = Tokens->getNextToken();
1953          FormatTok = Tokens->setPosition(StoredPosition);
1954          if (!mustBeJSIdent(Keywords, Next)) {
1955            nextToken();
1956            break;
1957          }
1958        }
1959        parseRecord();
1960        addUnwrappedLine();
1961        return;
1962      }
1963
1964      if (Style.isVerilog()) {
1965        if (FormatTok->is(Keywords.kw_table)) {
1966          parseVerilogTable();
1967          return;
1968        }
1969        if (Keywords.isVerilogBegin(*FormatTok) ||
1970            Keywords.isVerilogHierarchy(*FormatTok)) {
1971          parseBlock();
1972          addUnwrappedLine();
1973          return;
1974        }
1975      }
1976
1977      if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) {
1978        if (parseStructLike())
1979          return;
1980        break;
1981      }
1982
1983      if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1984        parseStatementMacro();
1985        return;
1986      }
1987
1988      // See if the following token should start a new unwrapped line.
1989      StringRef Text = FormatTok->TokenText;
1990
1991      FormatToken *PreviousToken = FormatTok;
1992      nextToken();
1993
1994      // JS doesn't have macros, and within classes colons indicate fields, not
1995      // labels.
1996      if (Style.isJavaScript())
1997        break;
1998
1999      auto OneTokenSoFar = [&]() {
2000        auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2001        while (I != E && I->Tok->is(tok::comment))
2002          ++I;
2003        if (Style.isVerilog())
2004          while (I != E && I->Tok->is(tok::hash))
2005            ++I;
2006        return I != E && (++I == E);
2007      };
2008      if (OneTokenSoFar()) {
2009        // Recognize function-like macro usages without trailing semicolon as
2010        // well as free-standing macros like Q_OBJECT.
2011        bool FunctionLike = FormatTok->is(tok::l_paren);
2012        if (FunctionLike)
2013          parseParens();
2014
2015        bool FollowedByNewline =
2016            CommentsBeforeNextToken.empty()
2017                ? FormatTok->NewlinesBefore > 0
2018                : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2019
2020        if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
2021            tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
2022          if (PreviousToken->isNot(TT_UntouchableMacroFunc))
2023            PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2024          addUnwrappedLine();
2025          return;
2026        }
2027      }
2028      break;
2029    }
2030    case tok::equal:
2031      if ((Style.isJavaScript() || Style.isCSharp()) &&
2032          FormatTok->is(TT_FatArrow)) {
2033        tryToParseChildBlock();
2034        break;
2035      }
2036
2037      nextToken();
2038      if (FormatTok->is(tok::l_brace)) {
2039        // Block kind should probably be set to BK_BracedInit for any language.
2040        // C# needs this change to ensure that array initialisers and object
2041        // initialisers are indented the same way.
2042        if (Style.isCSharp())
2043          FormatTok->setBlockKind(BK_BracedInit);
2044        // TableGen's defset statement has syntax of the form,
2045        // `defset <type> <name> = { <statement>... }`
2046        if (Style.isTableGen() &&
2047            Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) {
2048          FormatTok->setFinalizedType(TT_FunctionLBrace);
2049          parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2050                     /*MunchSemi=*/false);
2051          addUnwrappedLine();
2052          break;
2053        }
2054        nextToken();
2055        parseBracedList();
2056      } else if (Style.Language == FormatStyle::LK_Proto &&
2057                 FormatTok->is(tok::less)) {
2058        nextToken();
2059        parseBracedList(/*IsAngleBracket=*/true);
2060      }
2061      break;
2062    case tok::l_square:
2063      parseSquare();
2064      break;
2065    case tok::kw_new:
2066      parseNew();
2067      break;
2068    case tok::kw_case:
2069      // Proto: there are no switch/case statements.
2070      if (Style.Language == FormatStyle::LK_Proto) {
2071        nextToken();
2072        return;
2073      }
2074      // In Verilog switch is called case.
2075      if (Style.isVerilog()) {
2076        parseBlock();
2077        addUnwrappedLine();
2078        return;
2079      }
2080      if (Style.isJavaScript() && Line->MustBeDeclaration) {
2081        // 'case: string' field declaration.
2082        nextToken();
2083        break;
2084      }
2085      parseCaseLabel();
2086      break;
2087    case tok::kw_default:
2088      nextToken();
2089      if (Style.isVerilog()) {
2090        if (FormatTok->is(tok::colon)) {
2091          // The label will be handled in the next iteration.
2092          break;
2093        }
2094        if (FormatTok->is(Keywords.kw_clocking)) {
2095          // A default clocking block.
2096          parseBlock();
2097          addUnwrappedLine();
2098          return;
2099        }
2100        parseVerilogCaseLabel();
2101        return;
2102      }
2103      break;
2104    case tok::colon:
2105      nextToken();
2106      if (Style.isVerilog()) {
2107        parseVerilogCaseLabel();
2108        return;
2109      }
2110      break;
2111    default:
2112      nextToken();
2113      break;
2114    }
2115  } while (!eof());
2116}
2117
2118bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2119  assert(FormatTok->is(tok::l_brace));
2120  if (!Style.isCSharp())
2121    return false;
2122  // See if it's a property accessor.
2123  if (FormatTok->Previous->isNot(tok::identifier))
2124    return false;
2125
2126  // See if we are inside a property accessor.
2127  //
2128  // Record the current tokenPosition so that we can advance and
2129  // reset the current token. `Next` is not set yet so we need
2130  // another way to advance along the token stream.
2131  unsigned int StoredPosition = Tokens->getPosition();
2132  FormatToken *Tok = Tokens->getNextToken();
2133
2134  // A trivial property accessor is of the form:
2135  // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2136  // Track these as they do not require line breaks to be introduced.
2137  bool HasSpecialAccessor = false;
2138  bool IsTrivialPropertyAccessor = true;
2139  while (!eof()) {
2140    if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2141                     tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2142                     Keywords.kw_init, Keywords.kw_set)) {
2143      if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2144        HasSpecialAccessor = true;
2145      Tok = Tokens->getNextToken();
2146      continue;
2147    }
2148    if (Tok->isNot(tok::r_brace))
2149      IsTrivialPropertyAccessor = false;
2150    break;
2151  }
2152
2153  if (!HasSpecialAccessor) {
2154    Tokens->setPosition(StoredPosition);
2155    return false;
2156  }
2157
2158  // Try to parse the property accessor:
2159  // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2160  Tokens->setPosition(StoredPosition);
2161  if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2162    addUnwrappedLine();
2163  nextToken();
2164  do {
2165    switch (FormatTok->Tok.getKind()) {
2166    case tok::r_brace:
2167      nextToken();
2168      if (FormatTok->is(tok::equal)) {
2169        while (!eof() && FormatTok->isNot(tok::semi))
2170          nextToken();
2171        nextToken();
2172      }
2173      addUnwrappedLine();
2174      return true;
2175    case tok::l_brace:
2176      ++Line->Level;
2177      parseBlock(/*MustBeDeclaration=*/true);
2178      addUnwrappedLine();
2179      --Line->Level;
2180      break;
2181    case tok::equal:
2182      if (FormatTok->is(TT_FatArrow)) {
2183        ++Line->Level;
2184        do {
2185          nextToken();
2186        } while (!eof() && FormatTok->isNot(tok::semi));
2187        nextToken();
2188        addUnwrappedLine();
2189        --Line->Level;
2190        break;
2191      }
2192      nextToken();
2193      break;
2194    default:
2195      if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2196                             Keywords.kw_set) &&
2197          !IsTrivialPropertyAccessor) {
2198        // Non-trivial get/set needs to be on its own line.
2199        addUnwrappedLine();
2200      }
2201      nextToken();
2202    }
2203  } while (!eof());
2204
2205  // Unreachable for well-formed code (paired '{' and '}').
2206  return true;
2207}
2208
2209bool UnwrappedLineParser::tryToParseLambda() {
2210  assert(FormatTok->is(tok::l_square));
2211  if (!Style.isCpp()) {
2212    nextToken();
2213    return false;
2214  }
2215  FormatToken &LSquare = *FormatTok;
2216  if (!tryToParseLambdaIntroducer())
2217    return false;
2218
2219  bool SeenArrow = false;
2220  bool InTemplateParameterList = false;
2221
2222  while (FormatTok->isNot(tok::l_brace)) {
2223    if (FormatTok->isSimpleTypeSpecifier()) {
2224      nextToken();
2225      continue;
2226    }
2227    switch (FormatTok->Tok.getKind()) {
2228    case tok::l_brace:
2229      break;
2230    case tok::l_paren:
2231      parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2232      break;
2233    case tok::l_square:
2234      parseSquare();
2235      break;
2236    case tok::less:
2237      assert(FormatTok->Previous);
2238      if (FormatTok->Previous->is(tok::r_square))
2239        InTemplateParameterList = true;
2240      nextToken();
2241      break;
2242    case tok::kw_auto:
2243    case tok::kw_class:
2244    case tok::kw_template:
2245    case tok::kw_typename:
2246    case tok::amp:
2247    case tok::star:
2248    case tok::kw_const:
2249    case tok::kw_constexpr:
2250    case tok::kw_consteval:
2251    case tok::comma:
2252    case tok::greater:
2253    case tok::identifier:
2254    case tok::numeric_constant:
2255    case tok::coloncolon:
2256    case tok::kw_mutable:
2257    case tok::kw_noexcept:
2258    case tok::kw_static:
2259      nextToken();
2260      break;
2261    // Specialization of a template with an integer parameter can contain
2262    // arithmetic, logical, comparison and ternary operators.
2263    //
2264    // FIXME: This also accepts sequences of operators that are not in the scope
2265    // of a template argument list.
2266    //
2267    // In a C++ lambda a template type can only occur after an arrow. We use
2268    // this as an heuristic to distinguish between Objective-C expressions
2269    // followed by an `a->b` expression, such as:
2270    // ([obj func:arg] + a->b)
2271    // Otherwise the code below would parse as a lambda.
2272    case tok::plus:
2273    case tok::minus:
2274    case tok::exclaim:
2275    case tok::tilde:
2276    case tok::slash:
2277    case tok::percent:
2278    case tok::lessless:
2279    case tok::pipe:
2280    case tok::pipepipe:
2281    case tok::ampamp:
2282    case tok::caret:
2283    case tok::equalequal:
2284    case tok::exclaimequal:
2285    case tok::greaterequal:
2286    case tok::lessequal:
2287    case tok::question:
2288    case tok::colon:
2289    case tok::ellipsis:
2290    case tok::kw_true:
2291    case tok::kw_false:
2292      if (SeenArrow || InTemplateParameterList) {
2293        nextToken();
2294        break;
2295      }
2296      return true;
2297    case tok::arrow:
2298      // This might or might not actually be a lambda arrow (this could be an
2299      // ObjC method invocation followed by a dereferencing arrow). We might
2300      // reset this back to TT_Unknown in TokenAnnotator.
2301      FormatTok->setFinalizedType(TT_TrailingReturnArrow);
2302      SeenArrow = true;
2303      nextToken();
2304      break;
2305    case tok::kw_requires: {
2306      auto *RequiresToken = FormatTok;
2307      nextToken();
2308      parseRequiresClause(RequiresToken);
2309      break;
2310    }
2311    case tok::equal:
2312      if (!InTemplateParameterList)
2313        return true;
2314      nextToken();
2315      break;
2316    default:
2317      return true;
2318    }
2319  }
2320
2321  FormatTok->setFinalizedType(TT_LambdaLBrace);
2322  LSquare.setFinalizedType(TT_LambdaLSquare);
2323
2324  NestedLambdas.push_back(Line->SeenDecltypeAuto);
2325  parseChildBlock();
2326  assert(!NestedLambdas.empty());
2327  NestedLambdas.pop_back();
2328
2329  return true;
2330}
2331
2332bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2333  const FormatToken *Previous = FormatTok->Previous;
2334  const FormatToken *LeftSquare = FormatTok;
2335  nextToken();
2336  if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2337                     !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2338                                        tok::kw_co_yield, tok::kw_co_return)) ||
2339                    Previous->closesScope())) ||
2340      LeftSquare->isCppStructuredBinding(Style)) {
2341    return false;
2342  }
2343  if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind()))
2344    return false;
2345  if (FormatTok->is(tok::r_square)) {
2346    const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2347    if (Next->is(tok::greater))
2348      return false;
2349  }
2350  parseSquare(/*LambdaIntroducer=*/true);
2351  return true;
2352}
2353
2354void UnwrappedLineParser::tryToParseJSFunction() {
2355  assert(FormatTok->is(Keywords.kw_function));
2356  if (FormatTok->is(Keywords.kw_async))
2357    nextToken();
2358  // Consume "function".
2359  nextToken();
2360
2361  // Consume * (generator function). Treat it like C++'s overloaded operators.
2362  if (FormatTok->is(tok::star)) {
2363    FormatTok->setFinalizedType(TT_OverloadedOperator);
2364    nextToken();
2365  }
2366
2367  // Consume function name.
2368  if (FormatTok->is(tok::identifier))
2369    nextToken();
2370
2371  if (FormatTok->isNot(tok::l_paren))
2372    return;
2373
2374  // Parse formal parameter list.
2375  parseParens();
2376
2377  if (FormatTok->is(tok::colon)) {
2378    // Parse a type definition.
2379    nextToken();
2380
2381    // Eat the type declaration. For braced inline object types, balance braces,
2382    // otherwise just parse until finding an l_brace for the function body.
2383    if (FormatTok->is(tok::l_brace))
2384      tryToParseBracedList();
2385    else
2386      while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2387        nextToken();
2388  }
2389
2390  if (FormatTok->is(tok::semi))
2391    return;
2392
2393  parseChildBlock();
2394}
2395
2396bool UnwrappedLineParser::tryToParseBracedList() {
2397  if (FormatTok->is(BK_Unknown))
2398    calculateBraceTypes();
2399  assert(FormatTok->isNot(BK_Unknown));
2400  if (FormatTok->is(BK_Block))
2401    return false;
2402  nextToken();
2403  parseBracedList();
2404  return true;
2405}
2406
2407bool UnwrappedLineParser::tryToParseChildBlock() {
2408  assert(Style.isJavaScript() || Style.isCSharp());
2409  assert(FormatTok->is(TT_FatArrow));
2410  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2411  // They always start an expression or a child block if followed by a curly
2412  // brace.
2413  nextToken();
2414  if (FormatTok->isNot(tok::l_brace))
2415    return false;
2416  parseChildBlock();
2417  return true;
2418}
2419
2420bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2421  bool HasError = false;
2422
2423  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2424  // replace this by using parseAssignmentExpression() inside.
2425  do {
2426    if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2427        tryToParseChildBlock()) {
2428      continue;
2429    }
2430    if (Style.isJavaScript()) {
2431      if (FormatTok->is(Keywords.kw_function)) {
2432        tryToParseJSFunction();
2433        continue;
2434      }
2435      if (FormatTok->is(tok::l_brace)) {
2436        // Could be a method inside of a braced list `{a() { return 1; }}`.
2437        if (tryToParseBracedList())
2438          continue;
2439        parseChildBlock();
2440      }
2441    }
2442    if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
2443      if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2444        addUnwrappedLine();
2445      nextToken();
2446      return !HasError;
2447    }
2448    switch (FormatTok->Tok.getKind()) {
2449    case tok::l_square:
2450      if (Style.isCSharp())
2451        parseSquare();
2452      else
2453        tryToParseLambda();
2454      break;
2455    case tok::l_paren:
2456      parseParens();
2457      // JavaScript can just have free standing methods and getters/setters in
2458      // object literals. Detect them by a "{" following ")".
2459      if (Style.isJavaScript()) {
2460        if (FormatTok->is(tok::l_brace))
2461          parseChildBlock();
2462        break;
2463      }
2464      break;
2465    case tok::l_brace:
2466      // Assume there are no blocks inside a braced init list apart
2467      // from the ones we explicitly parse out (like lambdas).
2468      FormatTok->setBlockKind(BK_BracedInit);
2469      nextToken();
2470      parseBracedList();
2471      break;
2472    case tok::less:
2473      nextToken();
2474      if (IsAngleBracket)
2475        parseBracedList(/*IsAngleBracket=*/true);
2476      break;
2477    case tok::semi:
2478      // JavaScript (or more precisely TypeScript) can have semicolons in braced
2479      // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2480      // used for error recovery if we have otherwise determined that this is
2481      // a braced list.
2482      if (Style.isJavaScript()) {
2483        nextToken();
2484        break;
2485      }
2486      HasError = true;
2487      if (!IsEnum)
2488        return false;
2489      nextToken();
2490      break;
2491    case tok::comma:
2492      nextToken();
2493      if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2494        addUnwrappedLine();
2495      break;
2496    default:
2497      nextToken();
2498      break;
2499    }
2500  } while (!eof());
2501  return false;
2502}
2503
2504/// \brief Parses a pair of parentheses (and everything between them).
2505/// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2506/// double ampersands. This applies for all nested scopes as well.
2507///
2508/// Returns whether there is a `=` token between the parentheses.
2509bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2510  assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2511  auto *LeftParen = FormatTok;
2512  bool SeenEqual = false;
2513  bool MightBeFoldExpr = false;
2514  const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2515  nextToken();
2516  do {
2517    switch (FormatTok->Tok.getKind()) {
2518    case tok::l_paren:
2519      if (parseParens(AmpAmpTokenType))
2520        SeenEqual = true;
2521      if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2522        parseChildBlock();
2523      break;
2524    case tok::r_paren:
2525      if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody &&
2526          Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2527        const auto *Prev = LeftParen->Previous;
2528        const auto *Next = Tokens->peekNextToken();
2529        const bool DoubleParens =
2530            Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2531        const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2532        const bool Blacklisted =
2533            PrevPrev &&
2534            (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2535             (SeenEqual &&
2536              (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2537               PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2538        const bool ReturnParens =
2539            Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2540            ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2541             (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2542            Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2543            Next->is(tok::semi);
2544        if ((DoubleParens && !Blacklisted) || ReturnParens) {
2545          LeftParen->Optional = true;
2546          FormatTok->Optional = true;
2547        }
2548      }
2549      nextToken();
2550      return SeenEqual;
2551    case tok::r_brace:
2552      // A "}" inside parenthesis is an error if there wasn't a matching "{".
2553      return SeenEqual;
2554    case tok::l_square:
2555      tryToParseLambda();
2556      break;
2557    case tok::l_brace:
2558      if (!tryToParseBracedList())
2559        parseChildBlock();
2560      break;
2561    case tok::at:
2562      nextToken();
2563      if (FormatTok->is(tok::l_brace)) {
2564        nextToken();
2565        parseBracedList();
2566      }
2567      break;
2568    case tok::ellipsis:
2569      MightBeFoldExpr = true;
2570      nextToken();
2571      break;
2572    case tok::equal:
2573      SeenEqual = true;
2574      if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2575        tryToParseChildBlock();
2576      else
2577        nextToken();
2578      break;
2579    case tok::kw_class:
2580      if (Style.isJavaScript())
2581        parseRecord(/*ParseAsExpr=*/true);
2582      else
2583        nextToken();
2584      break;
2585    case tok::identifier:
2586      if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2587        tryToParseJSFunction();
2588      else
2589        nextToken();
2590      break;
2591    case tok::kw_requires: {
2592      auto RequiresToken = FormatTok;
2593      nextToken();
2594      parseRequiresExpression(RequiresToken);
2595      break;
2596    }
2597    case tok::ampamp:
2598      if (AmpAmpTokenType != TT_Unknown)
2599        FormatTok->setFinalizedType(AmpAmpTokenType);
2600      [[fallthrough]];
2601    default:
2602      nextToken();
2603      break;
2604    }
2605  } while (!eof());
2606  return SeenEqual;
2607}
2608
2609void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2610  if (!LambdaIntroducer) {
2611    assert(FormatTok->is(tok::l_square) && "'[' expected.");
2612    if (tryToParseLambda())
2613      return;
2614  }
2615  do {
2616    switch (FormatTok->Tok.getKind()) {
2617    case tok::l_paren:
2618      parseParens();
2619      break;
2620    case tok::r_square:
2621      nextToken();
2622      return;
2623    case tok::r_brace:
2624      // A "}" inside parenthesis is an error if there wasn't a matching "{".
2625      return;
2626    case tok::l_square:
2627      parseSquare();
2628      break;
2629    case tok::l_brace: {
2630      if (!tryToParseBracedList())
2631        parseChildBlock();
2632      break;
2633    }
2634    case tok::at:
2635      nextToken();
2636      if (FormatTok->is(tok::l_brace)) {
2637        nextToken();
2638        parseBracedList();
2639      }
2640      break;
2641    default:
2642      nextToken();
2643      break;
2644    }
2645  } while (!eof());
2646}
2647
2648void UnwrappedLineParser::keepAncestorBraces() {
2649  if (!Style.RemoveBracesLLVM)
2650    return;
2651
2652  const int MaxNestingLevels = 2;
2653  const int Size = NestedTooDeep.size();
2654  if (Size >= MaxNestingLevels)
2655    NestedTooDeep[Size - MaxNestingLevels] = true;
2656  NestedTooDeep.push_back(false);
2657}
2658
2659static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2660  for (const auto &Token : llvm::reverse(Line.Tokens))
2661    if (Token.Tok->isNot(tok::comment))
2662      return Token.Tok;
2663
2664  return nullptr;
2665}
2666
2667void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2668  FormatToken *Tok = nullptr;
2669
2670  if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2671      PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2672    Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2673              ? getLastNonComment(*Line)
2674              : Line->Tokens.back().Tok;
2675    assert(Tok);
2676    if (Tok->BraceCount < 0) {
2677      assert(Tok->BraceCount == -1);
2678      Tok = nullptr;
2679    } else {
2680      Tok->BraceCount = -1;
2681    }
2682  }
2683
2684  addUnwrappedLine();
2685  ++Line->Level;
2686  parseStructuralElement();
2687
2688  if (Tok) {
2689    assert(!Line->InPPDirective);
2690    Tok = nullptr;
2691    for (const auto &L : llvm::reverse(*CurrentLines)) {
2692      if (!L.InPPDirective && getLastNonComment(L)) {
2693        Tok = L.Tokens.back().Tok;
2694        break;
2695      }
2696    }
2697    assert(Tok);
2698    ++Tok->BraceCount;
2699  }
2700
2701  if (CheckEOF && eof())
2702    addUnwrappedLine();
2703
2704  --Line->Level;
2705}
2706
2707static void markOptionalBraces(FormatToken *LeftBrace) {
2708  if (!LeftBrace)
2709    return;
2710
2711  assert(LeftBrace->is(tok::l_brace));
2712
2713  FormatToken *RightBrace = LeftBrace->MatchingParen;
2714  if (!RightBrace) {
2715    assert(!LeftBrace->Optional);
2716    return;
2717  }
2718
2719  assert(RightBrace->is(tok::r_brace));
2720  assert(RightBrace->MatchingParen == LeftBrace);
2721  assert(LeftBrace->Optional == RightBrace->Optional);
2722
2723  LeftBrace->Optional = true;
2724  RightBrace->Optional = true;
2725}
2726
2727void UnwrappedLineParser::handleAttributes() {
2728  // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2729  if (FormatTok->isAttribute())
2730    nextToken();
2731  else if (FormatTok->is(tok::l_square))
2732    handleCppAttributes();
2733}
2734
2735bool UnwrappedLineParser::handleCppAttributes() {
2736  // Handle [[likely]] / [[unlikely]] attributes.
2737  assert(FormatTok->is(tok::l_square));
2738  if (!tryToParseSimpleAttribute())
2739    return false;
2740  parseSquare();
2741  return true;
2742}
2743
2744/// Returns whether \c Tok begins a block.
2745bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2746  // FIXME: rename the function or make
2747  // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2748  return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2749                           : Tok.is(tok::l_brace);
2750}
2751
2752FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2753                                                  bool KeepBraces,
2754                                                  bool IsVerilogAssert) {
2755  assert((FormatTok->is(tok::kw_if) ||
2756          (Style.isVerilog() &&
2757           FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2758                              Keywords.kw_assume, Keywords.kw_cover))) &&
2759         "'if' expected");
2760  nextToken();
2761
2762  if (IsVerilogAssert) {
2763    // Handle `assert #0` and `assert final`.
2764    if (FormatTok->is(Keywords.kw_verilogHash)) {
2765      nextToken();
2766      if (FormatTok->is(tok::numeric_constant))
2767        nextToken();
2768    } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2769                                  Keywords.kw_sequence)) {
2770      nextToken();
2771    }
2772  }
2773
2774  // TableGen's if statement has the form of `if <cond> then { ... }`.
2775  if (Style.isTableGen()) {
2776    while (!eof() && FormatTok->isNot(Keywords.kw_then)) {
2777      // Simply skip until then. This range only contains a value.
2778      nextToken();
2779    }
2780  }
2781
2782  // Handle `if !consteval`.
2783  if (FormatTok->is(tok::exclaim))
2784    nextToken();
2785
2786  bool KeepIfBraces = true;
2787  if (FormatTok->is(tok::kw_consteval)) {
2788    nextToken();
2789  } else {
2790    KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2791    if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2792      nextToken();
2793    if (FormatTok->is(tok::l_paren)) {
2794      FormatTok->setFinalizedType(TT_ConditionLParen);
2795      parseParens();
2796    }
2797  }
2798  handleAttributes();
2799  // The then action is optional in Verilog assert statements.
2800  if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2801    nextToken();
2802    addUnwrappedLine();
2803    return nullptr;
2804  }
2805
2806  bool NeedsUnwrappedLine = false;
2807  keepAncestorBraces();
2808
2809  FormatToken *IfLeftBrace = nullptr;
2810  IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2811
2812  if (isBlockBegin(*FormatTok)) {
2813    FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2814    IfLeftBrace = FormatTok;
2815    CompoundStatementIndenter Indenter(this, Style, Line->Level);
2816    parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2817               /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2818    setPreviousRBraceType(TT_ControlStatementRBrace);
2819    if (Style.BraceWrapping.BeforeElse)
2820      addUnwrappedLine();
2821    else
2822      NeedsUnwrappedLine = true;
2823  } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2824    addUnwrappedLine();
2825  } else {
2826    parseUnbracedBody();
2827  }
2828
2829  if (Style.RemoveBracesLLVM) {
2830    assert(!NestedTooDeep.empty());
2831    KeepIfBraces = KeepIfBraces ||
2832                   (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2833                   NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2834                   IfBlockKind == IfStmtKind::IfElseIf;
2835  }
2836
2837  bool KeepElseBraces = KeepIfBraces;
2838  FormatToken *ElseLeftBrace = nullptr;
2839  IfStmtKind Kind = IfStmtKind::IfOnly;
2840
2841  if (FormatTok->is(tok::kw_else)) {
2842    if (Style.RemoveBracesLLVM) {
2843      NestedTooDeep.back() = false;
2844      Kind = IfStmtKind::IfElse;
2845    }
2846    nextToken();
2847    handleAttributes();
2848    if (isBlockBegin(*FormatTok)) {
2849      const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2850      FormatTok->setFinalizedType(TT_ElseLBrace);
2851      ElseLeftBrace = FormatTok;
2852      CompoundStatementIndenter Indenter(this, Style, Line->Level);
2853      IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2854      FormatToken *IfLBrace =
2855          parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2856                     /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2857      setPreviousRBraceType(TT_ElseRBrace);
2858      if (FormatTok->is(tok::kw_else)) {
2859        KeepElseBraces = KeepElseBraces ||
2860                         ElseBlockKind == IfStmtKind::IfOnly ||
2861                         ElseBlockKind == IfStmtKind::IfElseIf;
2862      } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2863        KeepElseBraces = true;
2864        assert(ElseLeftBrace->MatchingParen);
2865        markOptionalBraces(ElseLeftBrace);
2866      }
2867      addUnwrappedLine();
2868    } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2869      const FormatToken *Previous = Tokens->getPreviousToken();
2870      assert(Previous);
2871      const bool IsPrecededByComment = Previous->is(tok::comment);
2872      if (IsPrecededByComment) {
2873        addUnwrappedLine();
2874        ++Line->Level;
2875      }
2876      bool TooDeep = true;
2877      if (Style.RemoveBracesLLVM) {
2878        Kind = IfStmtKind::IfElseIf;
2879        TooDeep = NestedTooDeep.pop_back_val();
2880      }
2881      ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2882      if (Style.RemoveBracesLLVM)
2883        NestedTooDeep.push_back(TooDeep);
2884      if (IsPrecededByComment)
2885        --Line->Level;
2886    } else {
2887      parseUnbracedBody(/*CheckEOF=*/true);
2888    }
2889  } else {
2890    KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2891    if (NeedsUnwrappedLine)
2892      addUnwrappedLine();
2893  }
2894
2895  if (!Style.RemoveBracesLLVM)
2896    return nullptr;
2897
2898  assert(!NestedTooDeep.empty());
2899  KeepElseBraces = KeepElseBraces ||
2900                   (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2901                   NestedTooDeep.back();
2902
2903  NestedTooDeep.pop_back();
2904
2905  if (!KeepIfBraces && !KeepElseBraces) {
2906    markOptionalBraces(IfLeftBrace);
2907    markOptionalBraces(ElseLeftBrace);
2908  } else if (IfLeftBrace) {
2909    FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2910    if (IfRightBrace) {
2911      assert(IfRightBrace->MatchingParen == IfLeftBrace);
2912      assert(!IfLeftBrace->Optional);
2913      assert(!IfRightBrace->Optional);
2914      IfLeftBrace->MatchingParen = nullptr;
2915      IfRightBrace->MatchingParen = nullptr;
2916    }
2917  }
2918
2919  if (IfKind)
2920    *IfKind = Kind;
2921
2922  return IfLeftBrace;
2923}
2924
2925void UnwrappedLineParser::parseTryCatch() {
2926  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2927  nextToken();
2928  bool NeedsUnwrappedLine = false;
2929  if (FormatTok->is(tok::colon)) {
2930    // We are in a function try block, what comes is an initializer list.
2931    nextToken();
2932
2933    // In case identifiers were removed by clang-tidy, what might follow is
2934    // multiple commas in sequence - before the first identifier.
2935    while (FormatTok->is(tok::comma))
2936      nextToken();
2937
2938    while (FormatTok->is(tok::identifier)) {
2939      nextToken();
2940      if (FormatTok->is(tok::l_paren))
2941        parseParens();
2942      if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2943          FormatTok->is(tok::l_brace)) {
2944        do {
2945          nextToken();
2946        } while (FormatTok->isNot(tok::r_brace));
2947        nextToken();
2948      }
2949
2950      // In case identifiers were removed by clang-tidy, what might follow is
2951      // multiple commas in sequence - after the first identifier.
2952      while (FormatTok->is(tok::comma))
2953        nextToken();
2954    }
2955  }
2956  // Parse try with resource.
2957  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2958    parseParens();
2959
2960  keepAncestorBraces();
2961
2962  if (FormatTok->is(tok::l_brace)) {
2963    CompoundStatementIndenter Indenter(this, Style, Line->Level);
2964    parseBlock();
2965    if (Style.BraceWrapping.BeforeCatch)
2966      addUnwrappedLine();
2967    else
2968      NeedsUnwrappedLine = true;
2969  } else if (FormatTok->isNot(tok::kw_catch)) {
2970    // The C++ standard requires a compound-statement after a try.
2971    // If there's none, we try to assume there's a structuralElement
2972    // and try to continue.
2973    addUnwrappedLine();
2974    ++Line->Level;
2975    parseStructuralElement();
2976    --Line->Level;
2977  }
2978  while (true) {
2979    if (FormatTok->is(tok::at))
2980      nextToken();
2981    if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2982                             tok::kw___finally) ||
2983          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2984           FormatTok->is(Keywords.kw_finally)) ||
2985          (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2986           FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2987      break;
2988    }
2989    nextToken();
2990    while (FormatTok->isNot(tok::l_brace)) {
2991      if (FormatTok->is(tok::l_paren)) {
2992        parseParens();
2993        continue;
2994      }
2995      if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2996        if (Style.RemoveBracesLLVM)
2997          NestedTooDeep.pop_back();
2998        return;
2999      }
3000      nextToken();
3001    }
3002    NeedsUnwrappedLine = false;
3003    Line->MustBeDeclaration = false;
3004    CompoundStatementIndenter Indenter(this, Style, Line->Level);
3005    parseBlock();
3006    if (Style.BraceWrapping.BeforeCatch)
3007      addUnwrappedLine();
3008    else
3009      NeedsUnwrappedLine = true;
3010  }
3011
3012  if (Style.RemoveBracesLLVM)
3013    NestedTooDeep.pop_back();
3014
3015  if (NeedsUnwrappedLine)
3016    addUnwrappedLine();
3017}
3018
3019void UnwrappedLineParser::parseNamespace() {
3020  assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3021         "'namespace' expected");
3022
3023  const FormatToken &InitialToken = *FormatTok;
3024  nextToken();
3025  if (InitialToken.is(TT_NamespaceMacro)) {
3026    parseParens();
3027  } else {
3028    while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
3029                              tok::l_square, tok::period, tok::l_paren) ||
3030           (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
3031      if (FormatTok->is(tok::l_square))
3032        parseSquare();
3033      else if (FormatTok->is(tok::l_paren))
3034        parseParens();
3035      else
3036        nextToken();
3037    }
3038  }
3039  if (FormatTok->is(tok::l_brace)) {
3040    FormatTok->setFinalizedType(TT_NamespaceLBrace);
3041
3042    if (ShouldBreakBeforeBrace(Style, InitialToken))
3043      addUnwrappedLine();
3044
3045    unsigned AddLevels =
3046        Style.NamespaceIndentation == FormatStyle::NI_All ||
3047                (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3048                 DeclarationScopeStack.size() > 1)
3049            ? 1u
3050            : 0u;
3051    bool ManageWhitesmithsBraces =
3052        AddLevels == 0u &&
3053        Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3054
3055    // If we're in Whitesmiths mode, indent the brace if we're not indenting
3056    // the whole block.
3057    if (ManageWhitesmithsBraces)
3058      ++Line->Level;
3059
3060    // Munch the semicolon after a namespace. This is more common than one would
3061    // think. Putting the semicolon into its own line is very ugly.
3062    parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3063               /*KeepBraces=*/true, /*IfKind=*/nullptr,
3064               ManageWhitesmithsBraces);
3065
3066    addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3067
3068    if (ManageWhitesmithsBraces)
3069      --Line->Level;
3070  }
3071  // FIXME: Add error handling.
3072}
3073
3074void UnwrappedLineParser::parseNew() {
3075  assert(FormatTok->is(tok::kw_new) && "'new' expected");
3076  nextToken();
3077
3078  if (Style.isCSharp()) {
3079    do {
3080      // Handle constructor invocation, e.g. `new(field: value)`.
3081      if (FormatTok->is(tok::l_paren))
3082        parseParens();
3083
3084      // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3085      if (FormatTok->is(tok::l_brace))
3086        parseBracedList();
3087
3088      if (FormatTok->isOneOf(tok::semi, tok::comma))
3089        return;
3090
3091      nextToken();
3092    } while (!eof());
3093  }
3094
3095  if (Style.Language != FormatStyle::LK_Java)
3096    return;
3097
3098  // In Java, we can parse everything up to the parens, which aren't optional.
3099  do {
3100    // There should not be a ;, { or } before the new's open paren.
3101    if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3102      return;
3103
3104    // Consume the parens.
3105    if (FormatTok->is(tok::l_paren)) {
3106      parseParens();
3107
3108      // If there is a class body of an anonymous class, consume that as child.
3109      if (FormatTok->is(tok::l_brace))
3110        parseChildBlock();
3111      return;
3112    }
3113    nextToken();
3114  } while (!eof());
3115}
3116
3117void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3118  keepAncestorBraces();
3119
3120  if (isBlockBegin(*FormatTok)) {
3121    FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3122    FormatToken *LeftBrace = FormatTok;
3123    CompoundStatementIndenter Indenter(this, Style, Line->Level);
3124    parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3125               /*MunchSemi=*/true, KeepBraces);
3126    setPreviousRBraceType(TT_ControlStatementRBrace);
3127    if (!KeepBraces) {
3128      assert(!NestedTooDeep.empty());
3129      if (!NestedTooDeep.back())
3130        markOptionalBraces(LeftBrace);
3131    }
3132    if (WrapRightBrace)
3133      addUnwrappedLine();
3134  } else {
3135    parseUnbracedBody();
3136  }
3137
3138  if (!KeepBraces)
3139    NestedTooDeep.pop_back();
3140}
3141
3142void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3143  assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3144          (Style.isVerilog() &&
3145           FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3146                              Keywords.kw_always_ff, Keywords.kw_always_latch,
3147                              Keywords.kw_final, Keywords.kw_initial,
3148                              Keywords.kw_foreach, Keywords.kw_forever,
3149                              Keywords.kw_repeat))) &&
3150         "'for', 'while' or foreach macro expected");
3151  const bool KeepBraces = !Style.RemoveBracesLLVM ||
3152                          !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3153
3154  nextToken();
3155  // JS' for await ( ...
3156  if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3157    nextToken();
3158  if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
3159    nextToken();
3160  if (HasParens && FormatTok->is(tok::l_paren)) {
3161    // The type is only set for Verilog basically because we were afraid to
3162    // change the existing behavior for loops. See the discussion on D121756 for
3163    // details.
3164    if (Style.isVerilog())
3165      FormatTok->setFinalizedType(TT_ConditionLParen);
3166    parseParens();
3167  }
3168
3169  if (Style.isVerilog()) {
3170    // Event control.
3171    parseVerilogSensitivityList();
3172  } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3173             Tokens->getPreviousToken()->is(tok::r_paren)) {
3174    nextToken();
3175    addUnwrappedLine();
3176    return;
3177  }
3178
3179  handleAttributes();
3180  parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3181}
3182
3183void UnwrappedLineParser::parseDoWhile() {
3184  assert(FormatTok->is(tok::kw_do) && "'do' expected");
3185  nextToken();
3186
3187  parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3188
3189  // FIXME: Add error handling.
3190  if (FormatTok->isNot(tok::kw_while)) {
3191    addUnwrappedLine();
3192    return;
3193  }
3194
3195  FormatTok->setFinalizedType(TT_DoWhile);
3196
3197  // If in Whitesmiths mode, the line with the while() needs to be indented
3198  // to the same level as the block.
3199  if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3200    ++Line->Level;
3201
3202  nextToken();
3203  parseStructuralElement();
3204}
3205
3206void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3207  nextToken();
3208  unsigned OldLineLevel = Line->Level;
3209  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3210    --Line->Level;
3211  if (LeftAlignLabel)
3212    Line->Level = 0;
3213
3214  if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3215      FormatTok->is(tok::l_brace)) {
3216
3217    CompoundStatementIndenter Indenter(this, Line->Level,
3218                                       Style.BraceWrapping.AfterCaseLabel,
3219                                       Style.BraceWrapping.IndentBraces);
3220    parseBlock();
3221    if (FormatTok->is(tok::kw_break)) {
3222      if (Style.BraceWrapping.AfterControlStatement ==
3223          FormatStyle::BWACS_Always) {
3224        addUnwrappedLine();
3225        if (!Style.IndentCaseBlocks &&
3226            Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3227          ++Line->Level;
3228        }
3229      }
3230      parseStructuralElement();
3231    }
3232    addUnwrappedLine();
3233  } else {
3234    if (FormatTok->is(tok::semi))
3235      nextToken();
3236    addUnwrappedLine();
3237  }
3238  Line->Level = OldLineLevel;
3239  if (FormatTok->isNot(tok::l_brace)) {
3240    parseStructuralElement();
3241    addUnwrappedLine();
3242  }
3243}
3244
3245void UnwrappedLineParser::parseCaseLabel() {
3246  assert(FormatTok->is(tok::kw_case) && "'case' expected");
3247
3248  // FIXME: fix handling of complex expressions here.
3249  do {
3250    nextToken();
3251    if (FormatTok->is(tok::colon)) {
3252      FormatTok->setFinalizedType(TT_CaseLabelColon);
3253      break;
3254    }
3255  } while (!eof());
3256  parseLabel();
3257}
3258
3259void UnwrappedLineParser::parseSwitch() {
3260  assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3261  nextToken();
3262  if (FormatTok->is(tok::l_paren))
3263    parseParens();
3264
3265  keepAncestorBraces();
3266
3267  if (FormatTok->is(tok::l_brace)) {
3268    CompoundStatementIndenter Indenter(this, Style, Line->Level);
3269    FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3270    parseBlock();
3271    setPreviousRBraceType(TT_ControlStatementRBrace);
3272    addUnwrappedLine();
3273  } else {
3274    addUnwrappedLine();
3275    ++Line->Level;
3276    parseStructuralElement();
3277    --Line->Level;
3278  }
3279
3280  if (Style.RemoveBracesLLVM)
3281    NestedTooDeep.pop_back();
3282}
3283
3284// Operators that can follow a C variable.
3285static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3286  switch (kind) {
3287  case tok::ampamp:
3288  case tok::ampequal:
3289  case tok::arrow:
3290  case tok::caret:
3291  case tok::caretequal:
3292  case tok::comma:
3293  case tok::ellipsis:
3294  case tok::equal:
3295  case tok::equalequal:
3296  case tok::exclaim:
3297  case tok::exclaimequal:
3298  case tok::greater:
3299  case tok::greaterequal:
3300  case tok::greatergreater:
3301  case tok::greatergreaterequal:
3302  case tok::l_paren:
3303  case tok::l_square:
3304  case tok::less:
3305  case tok::lessequal:
3306  case tok::lessless:
3307  case tok::lesslessequal:
3308  case tok::minus:
3309  case tok::minusequal:
3310  case tok::minusminus:
3311  case tok::percent:
3312  case tok::percentequal:
3313  case tok::period:
3314  case tok::pipe:
3315  case tok::pipeequal:
3316  case tok::pipepipe:
3317  case tok::plus:
3318  case tok::plusequal:
3319  case tok::plusplus:
3320  case tok::question:
3321  case tok::r_brace:
3322  case tok::r_paren:
3323  case tok::r_square:
3324  case tok::semi:
3325  case tok::slash:
3326  case tok::slashequal:
3327  case tok::star:
3328  case tok::starequal:
3329    return true;
3330  default:
3331    return false;
3332  }
3333}
3334
3335void UnwrappedLineParser::parseAccessSpecifier() {
3336  FormatToken *AccessSpecifierCandidate = FormatTok;
3337  nextToken();
3338  // Understand Qt's slots.
3339  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3340    nextToken();
3341  // Otherwise, we don't know what it is, and we'd better keep the next token.
3342  if (FormatTok->is(tok::colon)) {
3343    nextToken();
3344    addUnwrappedLine();
3345  } else if (FormatTok->isNot(tok::coloncolon) &&
3346             !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3347    // Not a variable name nor namespace name.
3348    addUnwrappedLine();
3349  } else if (AccessSpecifierCandidate) {
3350    // Consider the access specifier to be a C identifier.
3351    AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3352  }
3353}
3354
3355/// \brief Parses a requires, decides if it is a clause or an expression.
3356/// \pre The current token has to be the requires keyword.
3357/// \returns true if it parsed a clause.
3358bool clang::format::UnwrappedLineParser::parseRequires() {
3359  assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3360  auto RequiresToken = FormatTok;
3361
3362  // We try to guess if it is a requires clause, or a requires expression. For
3363  // that we first consume the keyword and check the next token.
3364  nextToken();
3365
3366  switch (FormatTok->Tok.getKind()) {
3367  case tok::l_brace:
3368    // This can only be an expression, never a clause.
3369    parseRequiresExpression(RequiresToken);
3370    return false;
3371  case tok::l_paren:
3372    // Clauses and expression can start with a paren, it's unclear what we have.
3373    break;
3374  default:
3375    // All other tokens can only be a clause.
3376    parseRequiresClause(RequiresToken);
3377    return true;
3378  }
3379
3380  // Looking forward we would have to decide if there are function declaration
3381  // like arguments to the requires expression:
3382  // requires (T t) {
3383  // Or there is a constraint expression for the requires clause:
3384  // requires (C<T> && ...
3385
3386  // But first let's look behind.
3387  auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3388
3389  if (!PreviousNonComment ||
3390      PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3391    // If there is no token, or an expression left brace, we are a requires
3392    // clause within a requires expression.
3393    parseRequiresClause(RequiresToken);
3394    return true;
3395  }
3396
3397  switch (PreviousNonComment->Tok.getKind()) {
3398  case tok::greater:
3399  case tok::r_paren:
3400  case tok::kw_noexcept:
3401  case tok::kw_const:
3402    // This is a requires clause.
3403    parseRequiresClause(RequiresToken);
3404    return true;
3405  case tok::amp:
3406  case tok::ampamp: {
3407    // This can be either:
3408    // if (... && requires (T t) ...)
3409    // Or
3410    // void member(...) && requires (C<T> ...
3411    // We check the one token before that for a const:
3412    // void member(...) const && requires (C<T> ...
3413    auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3414    if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3415      parseRequiresClause(RequiresToken);
3416      return true;
3417    }
3418    break;
3419  }
3420  default:
3421    if (PreviousNonComment->isTypeOrIdentifier()) {
3422      // This is a requires clause.
3423      parseRequiresClause(RequiresToken);
3424      return true;
3425    }
3426    // It's an expression.
3427    parseRequiresExpression(RequiresToken);
3428    return false;
3429  }
3430
3431  // Now we look forward and try to check if the paren content is a parameter
3432  // list. The parameters can be cv-qualified and contain references or
3433  // pointers.
3434  // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3435  // of stuff: typename, const, *, &, &&, ::, identifiers.
3436
3437  unsigned StoredPosition = Tokens->getPosition();
3438  FormatToken *NextToken = Tokens->getNextToken();
3439  int Lookahead = 0;
3440  auto PeekNext = [&Lookahead, &NextToken, this] {
3441    ++Lookahead;
3442    NextToken = Tokens->getNextToken();
3443  };
3444
3445  bool FoundType = false;
3446  bool LastWasColonColon = false;
3447  int OpenAngles = 0;
3448
3449  for (; Lookahead < 50; PeekNext()) {
3450    switch (NextToken->Tok.getKind()) {
3451    case tok::kw_volatile:
3452    case tok::kw_const:
3453    case tok::comma:
3454      if (OpenAngles == 0) {
3455        FormatTok = Tokens->setPosition(StoredPosition);
3456        parseRequiresExpression(RequiresToken);
3457        return false;
3458      }
3459      break;
3460    case tok::r_paren:
3461    case tok::pipepipe:
3462      FormatTok = Tokens->setPosition(StoredPosition);
3463      parseRequiresClause(RequiresToken);
3464      return true;
3465    case tok::eof:
3466      // Break out of the loop.
3467      Lookahead = 50;
3468      break;
3469    case tok::coloncolon:
3470      LastWasColonColon = true;
3471      break;
3472    case tok::identifier:
3473      if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3474        FormatTok = Tokens->setPosition(StoredPosition);
3475        parseRequiresExpression(RequiresToken);
3476        return false;
3477      }
3478      FoundType = true;
3479      LastWasColonColon = false;
3480      break;
3481    case tok::less:
3482      ++OpenAngles;
3483      break;
3484    case tok::greater:
3485      --OpenAngles;
3486      break;
3487    default:
3488      if (NextToken->isSimpleTypeSpecifier()) {
3489        FormatTok = Tokens->setPosition(StoredPosition);
3490        parseRequiresExpression(RequiresToken);
3491        return false;
3492      }
3493      break;
3494    }
3495  }
3496  // This seems to be a complicated expression, just assume it's a clause.
3497  FormatTok = Tokens->setPosition(StoredPosition);
3498  parseRequiresClause(RequiresToken);
3499  return true;
3500}
3501
3502/// \brief Parses a requires clause.
3503/// \param RequiresToken The requires keyword token, which starts this clause.
3504/// \pre We need to be on the next token after the requires keyword.
3505/// \sa parseRequiresExpression
3506///
3507/// Returns if it either has finished parsing the clause, or it detects, that
3508/// the clause is incorrect.
3509void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3510  assert(FormatTok->getPreviousNonComment() == RequiresToken);
3511  assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3512
3513  // If there is no previous token, we are within a requires expression,
3514  // otherwise we will always have the template or function declaration in front
3515  // of it.
3516  bool InRequiresExpression =
3517      !RequiresToken->Previous ||
3518      RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3519
3520  RequiresToken->setFinalizedType(InRequiresExpression
3521                                      ? TT_RequiresClauseInARequiresExpression
3522                                      : TT_RequiresClause);
3523
3524  // NOTE: parseConstraintExpression is only ever called from this function.
3525  // It could be inlined into here.
3526  parseConstraintExpression();
3527
3528  if (!InRequiresExpression)
3529    FormatTok->Previous->ClosesRequiresClause = true;
3530}
3531
3532/// \brief Parses a requires expression.
3533/// \param RequiresToken The requires keyword token, which starts this clause.
3534/// \pre We need to be on the next token after the requires keyword.
3535/// \sa parseRequiresClause
3536///
3537/// Returns if it either has finished parsing the expression, or it detects,
3538/// that the expression is incorrect.
3539void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3540  assert(FormatTok->getPreviousNonComment() == RequiresToken);
3541  assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3542
3543  RequiresToken->setFinalizedType(TT_RequiresExpression);
3544
3545  if (FormatTok->is(tok::l_paren)) {
3546    FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3547    parseParens();
3548  }
3549
3550  if (FormatTok->is(tok::l_brace)) {
3551    FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3552    parseChildBlock();
3553  }
3554}
3555
3556/// \brief Parses a constraint expression.
3557///
3558/// This is the body of a requires clause. It returns, when the parsing is
3559/// complete, or the expression is incorrect.
3560void UnwrappedLineParser::parseConstraintExpression() {
3561  // The special handling for lambdas is needed since tryToParseLambda() eats a
3562  // token and if a requires expression is the last part of a requires clause
3563  // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3564  // not set on the correct token. Thus we need to be aware if we even expect a
3565  // lambda to be possible.
3566  // template <typename T> requires requires { ... } [[nodiscard]] ...;
3567  bool LambdaNextTimeAllowed = true;
3568
3569  // Within lambda declarations, it is permitted to put a requires clause after
3570  // its template parameter list, which would place the requires clause right
3571  // before the parentheses of the parameters of the lambda declaration. Thus,
3572  // we track if we expect to see grouping parentheses at all.
3573  // Without this check, `requires foo<T> (T t)` in the below example would be
3574  // seen as the whole requires clause, accidentally eating the parameters of
3575  // the lambda.
3576  // [&]<typename T> requires foo<T> (T t) { ... };
3577  bool TopLevelParensAllowed = true;
3578
3579  do {
3580    bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3581
3582    switch (FormatTok->Tok.getKind()) {
3583    case tok::kw_requires: {
3584      auto RequiresToken = FormatTok;
3585      nextToken();
3586      parseRequiresExpression(RequiresToken);
3587      break;
3588    }
3589
3590    case tok::l_paren:
3591      if (!TopLevelParensAllowed)
3592        return;
3593      parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3594      TopLevelParensAllowed = false;
3595      break;
3596
3597    case tok::l_square:
3598      if (!LambdaThisTimeAllowed || !tryToParseLambda())
3599        return;
3600      break;
3601
3602    case tok::kw_const:
3603    case tok::semi:
3604    case tok::kw_class:
3605    case tok::kw_struct:
3606    case tok::kw_union:
3607      return;
3608
3609    case tok::l_brace:
3610      // Potential function body.
3611      return;
3612
3613    case tok::ampamp:
3614    case tok::pipepipe:
3615      FormatTok->setFinalizedType(TT_BinaryOperator);
3616      nextToken();
3617      LambdaNextTimeAllowed = true;
3618      TopLevelParensAllowed = true;
3619      break;
3620
3621    case tok::comma:
3622    case tok::comment:
3623      LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3624      nextToken();
3625      break;
3626
3627    case tok::kw_sizeof:
3628    case tok::greater:
3629    case tok::greaterequal:
3630    case tok::greatergreater:
3631    case tok::less:
3632    case tok::lessequal:
3633    case tok::lessless:
3634    case tok::equalequal:
3635    case tok::exclaim:
3636    case tok::exclaimequal:
3637    case tok::plus:
3638    case tok::minus:
3639    case tok::star:
3640    case tok::slash:
3641      LambdaNextTimeAllowed = true;
3642      TopLevelParensAllowed = true;
3643      // Just eat them.
3644      nextToken();
3645      break;
3646
3647    case tok::numeric_constant:
3648    case tok::coloncolon:
3649    case tok::kw_true:
3650    case tok::kw_false:
3651      TopLevelParensAllowed = false;
3652      // Just eat them.
3653      nextToken();
3654      break;
3655
3656    case tok::kw_static_cast:
3657    case tok::kw_const_cast:
3658    case tok::kw_reinterpret_cast:
3659    case tok::kw_dynamic_cast:
3660      nextToken();
3661      if (FormatTok->isNot(tok::less))
3662        return;
3663
3664      nextToken();
3665      parseBracedList(/*IsAngleBracket=*/true);
3666      break;
3667
3668    default:
3669      if (!FormatTok->Tok.getIdentifierInfo()) {
3670        // Identifiers are part of the default case, we check for more then
3671        // tok::identifier to handle builtin type traits.
3672        return;
3673      }
3674
3675      // We need to differentiate identifiers for a template deduction guide,
3676      // variables, or function return types (the constraint expression has
3677      // ended before that), and basically all other cases. But it's easier to
3678      // check the other way around.
3679      assert(FormatTok->Previous);
3680      switch (FormatTok->Previous->Tok.getKind()) {
3681      case tok::coloncolon:  // Nested identifier.
3682      case tok::ampamp:      // Start of a function or variable for the
3683      case tok::pipepipe:    // constraint expression. (binary)
3684      case tok::exclaim:     // The same as above, but unary.
3685      case tok::kw_requires: // Initial identifier of a requires clause.
3686      case tok::equal:       // Initial identifier of a concept declaration.
3687        break;
3688      default:
3689        return;
3690      }
3691
3692      // Read identifier with optional template declaration.
3693      nextToken();
3694      if (FormatTok->is(tok::less)) {
3695        nextToken();
3696        parseBracedList(/*IsAngleBracket=*/true);
3697      }
3698      TopLevelParensAllowed = false;
3699      break;
3700    }
3701  } while (!eof());
3702}
3703
3704bool UnwrappedLineParser::parseEnum() {
3705  const FormatToken &InitialToken = *FormatTok;
3706
3707  // Won't be 'enum' for NS_ENUMs.
3708  if (FormatTok->is(tok::kw_enum))
3709    nextToken();
3710
3711  // In TypeScript, "enum" can also be used as property name, e.g. in interface
3712  // declarations. An "enum" keyword followed by a colon would be a syntax
3713  // error and thus assume it is just an identifier.
3714  if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3715    return false;
3716
3717  // In protobuf, "enum" can be used as a field name.
3718  if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3719    return false;
3720
3721  // Eat up enum class ...
3722  if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3723    nextToken();
3724
3725  while (FormatTok->Tok.getIdentifierInfo() ||
3726         FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3727                            tok::greater, tok::comma, tok::question,
3728                            tok::l_square, tok::r_square)) {
3729    if (Style.isVerilog()) {
3730      FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3731      nextToken();
3732      // In Verilog the base type can have dimensions.
3733      while (FormatTok->is(tok::l_square))
3734        parseSquare();
3735    } else {
3736      nextToken();
3737    }
3738    // We can have macros or attributes in between 'enum' and the enum name.
3739    if (FormatTok->is(tok::l_paren))
3740      parseParens();
3741    assert(FormatTok->isNot(TT_AttributeSquare));
3742    if (FormatTok->is(tok::identifier)) {
3743      nextToken();
3744      // If there are two identifiers in a row, this is likely an elaborate
3745      // return type. In Java, this can be "implements", etc.
3746      if (Style.isCpp() && FormatTok->is(tok::identifier))
3747        return false;
3748    }
3749  }
3750
3751  // Just a declaration or something is wrong.
3752  if (FormatTok->isNot(tok::l_brace))
3753    return true;
3754  FormatTok->setFinalizedType(TT_EnumLBrace);
3755  FormatTok->setBlockKind(BK_Block);
3756
3757  if (Style.Language == FormatStyle::LK_Java) {
3758    // Java enums are different.
3759    parseJavaEnumBody();
3760    return true;
3761  }
3762  if (Style.Language == FormatStyle::LK_Proto) {
3763    parseBlock(/*MustBeDeclaration=*/true);
3764    return true;
3765  }
3766
3767  if (!Style.AllowShortEnumsOnASingleLine &&
3768      ShouldBreakBeforeBrace(Style, InitialToken)) {
3769    addUnwrappedLine();
3770  }
3771  // Parse enum body.
3772  nextToken();
3773  if (!Style.AllowShortEnumsOnASingleLine) {
3774    addUnwrappedLine();
3775    Line->Level += 1;
3776  }
3777  bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3778  if (!Style.AllowShortEnumsOnASingleLine)
3779    Line->Level -= 1;
3780  if (HasError) {
3781    if (FormatTok->is(tok::semi))
3782      nextToken();
3783    addUnwrappedLine();
3784  }
3785  setPreviousRBraceType(TT_EnumRBrace);
3786  return true;
3787
3788  // There is no addUnwrappedLine() here so that we fall through to parsing a
3789  // structural element afterwards. Thus, in "enum A {} n, m;",
3790  // "} n, m;" will end up in one unwrapped line.
3791}
3792
3793bool UnwrappedLineParser::parseStructLike() {
3794  // parseRecord falls through and does not yet add an unwrapped line as a
3795  // record declaration or definition can start a structural element.
3796  parseRecord();
3797  // This does not apply to Java, JavaScript and C#.
3798  if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3799      Style.isCSharp()) {
3800    if (FormatTok->is(tok::semi))
3801      nextToken();
3802    addUnwrappedLine();
3803    return true;
3804  }
3805  return false;
3806}
3807
3808namespace {
3809// A class used to set and restore the Token position when peeking
3810// ahead in the token source.
3811class ScopedTokenPosition {
3812  unsigned StoredPosition;
3813  FormatTokenSource *Tokens;
3814
3815public:
3816  ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3817    assert(Tokens && "Tokens expected to not be null");
3818    StoredPosition = Tokens->getPosition();
3819  }
3820
3821  ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3822};
3823} // namespace
3824
3825// Look to see if we have [[ by looking ahead, if
3826// its not then rewind to the original position.
3827bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3828  ScopedTokenPosition AutoPosition(Tokens);
3829  FormatToken *Tok = Tokens->getNextToken();
3830  // We already read the first [ check for the second.
3831  if (Tok->isNot(tok::l_square))
3832    return false;
3833  // Double check that the attribute is just something
3834  // fairly simple.
3835  while (Tok->isNot(tok::eof)) {
3836    if (Tok->is(tok::r_square))
3837      break;
3838    Tok = Tokens->getNextToken();
3839  }
3840  if (Tok->is(tok::eof))
3841    return false;
3842  Tok = Tokens->getNextToken();
3843  if (Tok->isNot(tok::r_square))
3844    return false;
3845  Tok = Tokens->getNextToken();
3846  if (Tok->is(tok::semi))
3847    return false;
3848  return true;
3849}
3850
3851void UnwrappedLineParser::parseJavaEnumBody() {
3852  assert(FormatTok->is(tok::l_brace));
3853  const FormatToken *OpeningBrace = FormatTok;
3854
3855  // Determine whether the enum is simple, i.e. does not have a semicolon or
3856  // constants with class bodies. Simple enums can be formatted like braced
3857  // lists, contracted to a single line, etc.
3858  unsigned StoredPosition = Tokens->getPosition();
3859  bool IsSimple = true;
3860  FormatToken *Tok = Tokens->getNextToken();
3861  while (Tok->isNot(tok::eof)) {
3862    if (Tok->is(tok::r_brace))
3863      break;
3864    if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3865      IsSimple = false;
3866      break;
3867    }
3868    // FIXME: This will also mark enums with braces in the arguments to enum
3869    // constants as "not simple". This is probably fine in practice, though.
3870    Tok = Tokens->getNextToken();
3871  }
3872  FormatTok = Tokens->setPosition(StoredPosition);
3873
3874  if (IsSimple) {
3875    nextToken();
3876    parseBracedList();
3877    addUnwrappedLine();
3878    return;
3879  }
3880
3881  // Parse the body of a more complex enum.
3882  // First add a line for everything up to the "{".
3883  nextToken();
3884  addUnwrappedLine();
3885  ++Line->Level;
3886
3887  // Parse the enum constants.
3888  while (!eof()) {
3889    if (FormatTok->is(tok::l_brace)) {
3890      // Parse the constant's class body.
3891      parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3892                 /*MunchSemi=*/false);
3893    } else if (FormatTok->is(tok::l_paren)) {
3894      parseParens();
3895    } else if (FormatTok->is(tok::comma)) {
3896      nextToken();
3897      addUnwrappedLine();
3898    } else if (FormatTok->is(tok::semi)) {
3899      nextToken();
3900      addUnwrappedLine();
3901      break;
3902    } else if (FormatTok->is(tok::r_brace)) {
3903      addUnwrappedLine();
3904      break;
3905    } else {
3906      nextToken();
3907    }
3908  }
3909
3910  // Parse the class body after the enum's ";" if any.
3911  parseLevel(OpeningBrace);
3912  nextToken();
3913  --Line->Level;
3914  addUnwrappedLine();
3915}
3916
3917void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3918  const FormatToken &InitialToken = *FormatTok;
3919  nextToken();
3920
3921  auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
3922    return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
3923  };
3924  // The actual identifier can be a nested name specifier, and in macros
3925  // it is often token-pasted.
3926  // An [[attribute]] can be before the identifier.
3927  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3928                            tok::kw_alignas, tok::l_square) ||
3929         FormatTok->isAttribute() ||
3930         ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3931          FormatTok->isOneOf(tok::period, tok::comma))) {
3932    if (Style.isJavaScript() &&
3933        FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3934      // JavaScript/TypeScript supports inline object types in
3935      // extends/implements positions:
3936      //     class Foo implements {bar: number} { }
3937      nextToken();
3938      if (FormatTok->is(tok::l_brace)) {
3939        tryToParseBracedList();
3940        continue;
3941      }
3942    }
3943    if (FormatTok->is(tok::l_square) && handleCppAttributes())
3944      continue;
3945    nextToken();
3946    // We can have macros in between 'class' and the class name.
3947    if (!IsNonMacroIdentifier(FormatTok->Previous) &&
3948        FormatTok->is(tok::l_paren)) {
3949      parseParens();
3950    }
3951  }
3952
3953  if (FormatTok->isOneOf(tok::colon, tok::less)) {
3954    int AngleNestingLevel = 0;
3955    do {
3956      if (FormatTok->is(tok::less))
3957        ++AngleNestingLevel;
3958      else if (FormatTok->is(tok::greater))
3959        --AngleNestingLevel;
3960
3961      if (AngleNestingLevel == 0 && FormatTok->is(tok::l_paren) &&
3962          IsNonMacroIdentifier(FormatTok->Previous)) {
3963        break;
3964      }
3965      if (FormatTok->is(tok::l_brace)) {
3966        calculateBraceTypes(/*ExpectClassBody=*/true);
3967        if (!tryToParseBracedList())
3968          break;
3969      }
3970      if (FormatTok->is(tok::l_square)) {
3971        FormatToken *Previous = FormatTok->Previous;
3972        if (!Previous ||
3973            !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3974          // Don't try parsing a lambda if we had a closing parenthesis before,
3975          // it was probably a pointer to an array: int (*)[].
3976          if (!tryToParseLambda())
3977            continue;
3978        } else {
3979          parseSquare();
3980          continue;
3981        }
3982      }
3983      if (FormatTok->is(tok::semi))
3984        return;
3985      if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3986        addUnwrappedLine();
3987        nextToken();
3988        parseCSharpGenericTypeConstraint();
3989        break;
3990      }
3991      nextToken();
3992    } while (!eof());
3993  }
3994
3995  auto GetBraceTypes =
3996      [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
3997    switch (RecordTok.Tok.getKind()) {
3998    case tok::kw_class:
3999      return {TT_ClassLBrace, TT_ClassRBrace};
4000    case tok::kw_struct:
4001      return {TT_StructLBrace, TT_StructRBrace};
4002    case tok::kw_union:
4003      return {TT_UnionLBrace, TT_UnionRBrace};
4004    default:
4005      // Useful for e.g. interface.
4006      return {TT_RecordLBrace, TT_RecordRBrace};
4007    }
4008  };
4009  if (FormatTok->is(tok::l_brace)) {
4010    auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4011    FormatTok->setFinalizedType(OpenBraceType);
4012    if (ParseAsExpr) {
4013      parseChildBlock();
4014    } else {
4015      if (ShouldBreakBeforeBrace(Style, InitialToken))
4016        addUnwrappedLine();
4017
4018      unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4019      parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4020    }
4021    setPreviousRBraceType(ClosingBraceType);
4022  }
4023  // There is no addUnwrappedLine() here so that we fall through to parsing a
4024  // structural element afterwards. Thus, in "class A {} n, m;",
4025  // "} n, m;" will end up in one unwrapped line.
4026}
4027
4028void UnwrappedLineParser::parseObjCMethod() {
4029  assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4030         "'(' or identifier expected.");
4031  do {
4032    if (FormatTok->is(tok::semi)) {
4033      nextToken();
4034      addUnwrappedLine();
4035      return;
4036    } else if (FormatTok->is(tok::l_brace)) {
4037      if (Style.BraceWrapping.AfterFunction)
4038        addUnwrappedLine();
4039      parseBlock();
4040      addUnwrappedLine();
4041      return;
4042    } else {
4043      nextToken();
4044    }
4045  } while (!eof());
4046}
4047
4048void UnwrappedLineParser::parseObjCProtocolList() {
4049  assert(FormatTok->is(tok::less) && "'<' expected.");
4050  do {
4051    nextToken();
4052    // Early exit in case someone forgot a close angle.
4053    if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4054        FormatTok->isObjCAtKeyword(tok::objc_end)) {
4055      return;
4056    }
4057  } while (!eof() && FormatTok->isNot(tok::greater));
4058  nextToken(); // Skip '>'.
4059}
4060
4061void UnwrappedLineParser::parseObjCUntilAtEnd() {
4062  do {
4063    if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4064      nextToken();
4065      addUnwrappedLine();
4066      break;
4067    }
4068    if (FormatTok->is(tok::l_brace)) {
4069      parseBlock();
4070      // In ObjC interfaces, nothing should be following the "}".
4071      addUnwrappedLine();
4072    } else if (FormatTok->is(tok::r_brace)) {
4073      // Ignore stray "}". parseStructuralElement doesn't consume them.
4074      nextToken();
4075      addUnwrappedLine();
4076    } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4077      nextToken();
4078      parseObjCMethod();
4079    } else {
4080      parseStructuralElement();
4081    }
4082  } while (!eof());
4083}
4084
4085void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4086  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4087         FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4088  nextToken();
4089  nextToken(); // interface name
4090
4091  // @interface can be followed by a lightweight generic
4092  // specialization list, then either a base class or a category.
4093  if (FormatTok->is(tok::less))
4094    parseObjCLightweightGenerics();
4095  if (FormatTok->is(tok::colon)) {
4096    nextToken();
4097    nextToken(); // base class name
4098    // The base class can also have lightweight generics applied to it.
4099    if (FormatTok->is(tok::less))
4100      parseObjCLightweightGenerics();
4101  } else if (FormatTok->is(tok::l_paren)) {
4102    // Skip category, if present.
4103    parseParens();
4104  }
4105
4106  if (FormatTok->is(tok::less))
4107    parseObjCProtocolList();
4108
4109  if (FormatTok->is(tok::l_brace)) {
4110    if (Style.BraceWrapping.AfterObjCDeclaration)
4111      addUnwrappedLine();
4112    parseBlock(/*MustBeDeclaration=*/true);
4113  }
4114
4115  // With instance variables, this puts '}' on its own line.  Without instance
4116  // variables, this ends the @interface line.
4117  addUnwrappedLine();
4118
4119  parseObjCUntilAtEnd();
4120}
4121
4122void UnwrappedLineParser::parseObjCLightweightGenerics() {
4123  assert(FormatTok->is(tok::less));
4124  // Unlike protocol lists, generic parameterizations support
4125  // nested angles:
4126  //
4127  // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4128  //     NSObject <NSCopying, NSSecureCoding>
4129  //
4130  // so we need to count how many open angles we have left.
4131  unsigned NumOpenAngles = 1;
4132  do {
4133    nextToken();
4134    // Early exit in case someone forgot a close angle.
4135    if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4136        FormatTok->isObjCAtKeyword(tok::objc_end)) {
4137      break;
4138    }
4139    if (FormatTok->is(tok::less)) {
4140      ++NumOpenAngles;
4141    } else if (FormatTok->is(tok::greater)) {
4142      assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4143      --NumOpenAngles;
4144    }
4145  } while (!eof() && NumOpenAngles != 0);
4146  nextToken(); // Skip '>'.
4147}
4148
4149// Returns true for the declaration/definition form of @protocol,
4150// false for the expression form.
4151bool UnwrappedLineParser::parseObjCProtocol() {
4152  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4153  nextToken();
4154
4155  if (FormatTok->is(tok::l_paren)) {
4156    // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4157    return false;
4158  }
4159
4160  // The definition/declaration form,
4161  // @protocol Foo
4162  // - (int)someMethod;
4163  // @end
4164
4165  nextToken(); // protocol name
4166
4167  if (FormatTok->is(tok::less))
4168    parseObjCProtocolList();
4169
4170  // Check for protocol declaration.
4171  if (FormatTok->is(tok::semi)) {
4172    nextToken();
4173    addUnwrappedLine();
4174    return true;
4175  }
4176
4177  addUnwrappedLine();
4178  parseObjCUntilAtEnd();
4179  return true;
4180}
4181
4182void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4183  bool IsImport = FormatTok->is(Keywords.kw_import);
4184  assert(IsImport || FormatTok->is(tok::kw_export));
4185  nextToken();
4186
4187  // Consume the "default" in "export default class/function".
4188  if (FormatTok->is(tok::kw_default))
4189    nextToken();
4190
4191  // Consume "async function", "function" and "default function", so that these
4192  // get parsed as free-standing JS functions, i.e. do not require a trailing
4193  // semicolon.
4194  if (FormatTok->is(Keywords.kw_async))
4195    nextToken();
4196  if (FormatTok->is(Keywords.kw_function)) {
4197    nextToken();
4198    return;
4199  }
4200
4201  // For imports, `export *`, `export {...}`, consume the rest of the line up
4202  // to the terminating `;`. For everything else, just return and continue
4203  // parsing the structural element, i.e. the declaration or expression for
4204  // `export default`.
4205  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4206      !FormatTok->isStringLiteral() &&
4207      !(FormatTok->is(Keywords.kw_type) &&
4208        Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4209    return;
4210  }
4211
4212  while (!eof()) {
4213    if (FormatTok->is(tok::semi))
4214      return;
4215    if (Line->Tokens.empty()) {
4216      // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4217      // import statement should terminate.
4218      return;
4219    }
4220    if (FormatTok->is(tok::l_brace)) {
4221      FormatTok->setBlockKind(BK_Block);
4222      nextToken();
4223      parseBracedList();
4224    } else {
4225      nextToken();
4226    }
4227  }
4228}
4229
4230void UnwrappedLineParser::parseStatementMacro() {
4231  nextToken();
4232  if (FormatTok->is(tok::l_paren))
4233    parseParens();
4234  if (FormatTok->is(tok::semi))
4235    nextToken();
4236  addUnwrappedLine();
4237}
4238
4239void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4240  // consume things like a::`b.c[d:e] or a::*
4241  while (true) {
4242    if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4243                           tok::coloncolon, tok::hash) ||
4244        Keywords.isVerilogIdentifier(*FormatTok)) {
4245      nextToken();
4246    } else if (FormatTok->is(tok::l_square)) {
4247      parseSquare();
4248    } else {
4249      break;
4250    }
4251  }
4252}
4253
4254void UnwrappedLineParser::parseVerilogSensitivityList() {
4255  if (FormatTok->isNot(tok::at))
4256    return;
4257  nextToken();
4258  // A block event expression has 2 at signs.
4259  if (FormatTok->is(tok::at))
4260    nextToken();
4261  switch (FormatTok->Tok.getKind()) {
4262  case tok::star:
4263    nextToken();
4264    break;
4265  case tok::l_paren:
4266    parseParens();
4267    break;
4268  default:
4269    parseVerilogHierarchyIdentifier();
4270    break;
4271  }
4272}
4273
4274unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4275  unsigned AddLevels = 0;
4276
4277  if (FormatTok->is(Keywords.kw_clocking)) {
4278    nextToken();
4279    if (Keywords.isVerilogIdentifier(*FormatTok))
4280      nextToken();
4281    parseVerilogSensitivityList();
4282    if (FormatTok->is(tok::semi))
4283      nextToken();
4284  } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4285                                Keywords.kw_casez, Keywords.kw_randcase,
4286                                Keywords.kw_randsequence)) {
4287    if (Style.IndentCaseLabels)
4288      AddLevels++;
4289    nextToken();
4290    if (FormatTok->is(tok::l_paren)) {
4291      FormatTok->setFinalizedType(TT_ConditionLParen);
4292      parseParens();
4293    }
4294    if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4295      nextToken();
4296    // The case header has no semicolon.
4297  } else {
4298    // "module" etc.
4299    nextToken();
4300    // all the words like the name of the module and specifiers like
4301    // "automatic" and the width of function return type
4302    while (true) {
4303      if (FormatTok->is(tok::l_square)) {
4304        auto Prev = FormatTok->getPreviousNonComment();
4305        if (Prev && Keywords.isVerilogIdentifier(*Prev))
4306          Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4307        parseSquare();
4308      } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4309                 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4310        nextToken();
4311      } else {
4312        break;
4313      }
4314    }
4315
4316    auto NewLine = [this]() {
4317      addUnwrappedLine();
4318      Line->IsContinuation = true;
4319    };
4320
4321    // package imports
4322    while (FormatTok->is(Keywords.kw_import)) {
4323      NewLine();
4324      nextToken();
4325      parseVerilogHierarchyIdentifier();
4326      if (FormatTok->is(tok::semi))
4327        nextToken();
4328    }
4329
4330    // parameters and ports
4331    if (FormatTok->is(Keywords.kw_verilogHash)) {
4332      NewLine();
4333      nextToken();
4334      if (FormatTok->is(tok::l_paren)) {
4335        FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4336        parseParens();
4337      }
4338    }
4339    if (FormatTok->is(tok::l_paren)) {
4340      NewLine();
4341      FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4342      parseParens();
4343    }
4344
4345    // extends and implements
4346    if (FormatTok->is(Keywords.kw_extends)) {
4347      NewLine();
4348      nextToken();
4349      parseVerilogHierarchyIdentifier();
4350      if (FormatTok->is(tok::l_paren))
4351        parseParens();
4352    }
4353    if (FormatTok->is(Keywords.kw_implements)) {
4354      NewLine();
4355      do {
4356        nextToken();
4357        parseVerilogHierarchyIdentifier();
4358      } while (FormatTok->is(tok::comma));
4359    }
4360
4361    // Coverage event for cover groups.
4362    if (FormatTok->is(tok::at)) {
4363      NewLine();
4364      parseVerilogSensitivityList();
4365    }
4366
4367    if (FormatTok->is(tok::semi))
4368      nextToken(/*LevelDifference=*/1);
4369    addUnwrappedLine();
4370  }
4371
4372  return AddLevels;
4373}
4374
4375void UnwrappedLineParser::parseVerilogTable() {
4376  assert(FormatTok->is(Keywords.kw_table));
4377  nextToken(/*LevelDifference=*/1);
4378  addUnwrappedLine();
4379
4380  auto InitialLevel = Line->Level++;
4381  while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4382    FormatToken *Tok = FormatTok;
4383    nextToken();
4384    if (Tok->is(tok::semi))
4385      addUnwrappedLine();
4386    else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4387      Tok->setFinalizedType(TT_VerilogTableItem);
4388  }
4389  Line->Level = InitialLevel;
4390  nextToken(/*LevelDifference=*/-1);
4391  addUnwrappedLine();
4392}
4393
4394void UnwrappedLineParser::parseVerilogCaseLabel() {
4395  // The label will get unindented in AnnotatingParser. If there are no leading
4396  // spaces, indent the rest here so that things inside the block will be
4397  // indented relative to things outside. We don't use parseLabel because we
4398  // don't know whether this colon is a label or a ternary expression at this
4399  // point.
4400  auto OrigLevel = Line->Level;
4401  auto FirstLine = CurrentLines->size();
4402  if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4403    ++Line->Level;
4404  else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4405    --Line->Level;
4406  parseStructuralElement();
4407  // Restore the indentation in both the new line and the line that has the
4408  // label.
4409  if (CurrentLines->size() > FirstLine)
4410    (*CurrentLines)[FirstLine].Level = OrigLevel;
4411  Line->Level = OrigLevel;
4412}
4413
4414bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4415  for (const auto &N : Line.Tokens) {
4416    if (N.Tok->MacroCtx)
4417      return true;
4418    for (const UnwrappedLine &Child : N.Children)
4419      if (containsExpansion(Child))
4420        return true;
4421  }
4422  return false;
4423}
4424
4425void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4426  if (Line->Tokens.empty())
4427    return;
4428  LLVM_DEBUG({
4429    if (!parsingPPDirective()) {
4430      llvm::dbgs() << "Adding unwrapped line:\n";
4431      printDebugInfo(*Line);
4432    }
4433  });
4434
4435  // If this line closes a block when in Whitesmiths mode, remember that
4436  // information so that the level can be decreased after the line is added.
4437  // This has to happen after the addition of the line since the line itself
4438  // needs to be indented.
4439  bool ClosesWhitesmithsBlock =
4440      Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4441      Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4442
4443  // If the current line was expanded from a macro call, we use it to
4444  // reconstruct an unwrapped line from the structure of the expanded unwrapped
4445  // line and the unexpanded token stream.
4446  if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4447    if (!Reconstruct)
4448      Reconstruct.emplace(Line->Level, Unexpanded);
4449    Reconstruct->addLine(*Line);
4450
4451    // While the reconstructed unexpanded lines are stored in the normal
4452    // flow of lines, the expanded lines are stored on the side to be analyzed
4453    // in an extra step.
4454    CurrentExpandedLines.push_back(std::move(*Line));
4455
4456    if (Reconstruct->finished()) {
4457      UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4458      assert(!Reconstructed.Tokens.empty() &&
4459             "Reconstructed must at least contain the macro identifier.");
4460      assert(!parsingPPDirective());
4461      LLVM_DEBUG({
4462        llvm::dbgs() << "Adding unexpanded line:\n";
4463        printDebugInfo(Reconstructed);
4464      });
4465      ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4466      Lines.push_back(std::move(Reconstructed));
4467      CurrentExpandedLines.clear();
4468      Reconstruct.reset();
4469    }
4470  } else {
4471    // At the top level we only get here when no unexpansion is going on, or
4472    // when conditional formatting led to unfinished macro reconstructions.
4473    assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4474    CurrentLines->push_back(std::move(*Line));
4475  }
4476  Line->Tokens.clear();
4477  Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4478  Line->FirstStartColumn = 0;
4479  Line->IsContinuation = false;
4480  Line->SeenDecltypeAuto = false;
4481
4482  if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4483    --Line->Level;
4484  if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4485    CurrentLines->append(
4486        std::make_move_iterator(PreprocessorDirectives.begin()),
4487        std::make_move_iterator(PreprocessorDirectives.end()));
4488    PreprocessorDirectives.clear();
4489  }
4490  // Disconnect the current token from the last token on the previous line.
4491  FormatTok->Previous = nullptr;
4492}
4493
4494bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4495
4496bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4497  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4498         FormatTok.NewlinesBefore > 0;
4499}
4500
4501// Checks if \p FormatTok is a line comment that continues the line comment
4502// section on \p Line.
4503static bool
4504continuesLineCommentSection(const FormatToken &FormatTok,
4505                            const UnwrappedLine &Line,
4506                            const llvm::Regex &CommentPragmasRegex) {
4507  if (Line.Tokens.empty())
4508    return false;
4509
4510  StringRef IndentContent = FormatTok.TokenText;
4511  if (FormatTok.TokenText.starts_with("//") ||
4512      FormatTok.TokenText.starts_with("/*")) {
4513    IndentContent = FormatTok.TokenText.substr(2);
4514  }
4515  if (CommentPragmasRegex.match(IndentContent))
4516    return false;
4517
4518  // If Line starts with a line comment, then FormatTok continues the comment
4519  // section if its original column is greater or equal to the original start
4520  // column of the line.
4521  //
4522  // Define the min column token of a line as follows: if a line ends in '{' or
4523  // contains a '{' followed by a line comment, then the min column token is
4524  // that '{'. Otherwise, the min column token of the line is the first token of
4525  // the line.
4526  //
4527  // If Line starts with a token other than a line comment, then FormatTok
4528  // continues the comment section if its original column is greater than the
4529  // original start column of the min column token of the line.
4530  //
4531  // For example, the second line comment continues the first in these cases:
4532  //
4533  // // first line
4534  // // second line
4535  //
4536  // and:
4537  //
4538  // // first line
4539  //  // second line
4540  //
4541  // and:
4542  //
4543  // int i; // first line
4544  //  // second line
4545  //
4546  // and:
4547  //
4548  // do { // first line
4549  //      // second line
4550  //   int i;
4551  // } while (true);
4552  //
4553  // and:
4554  //
4555  // enum {
4556  //   a, // first line
4557  //    // second line
4558  //   b
4559  // };
4560  //
4561  // The second line comment doesn't continue the first in these cases:
4562  //
4563  //   // first line
4564  //  // second line
4565  //
4566  // and:
4567  //
4568  // int i; // first line
4569  // // second line
4570  //
4571  // and:
4572  //
4573  // do { // first line
4574  //   // second line
4575  //   int i;
4576  // } while (true);
4577  //
4578  // and:
4579  //
4580  // enum {
4581  //   a, // first line
4582  //   // second line
4583  // };
4584  const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4585
4586  // Scan for '{//'. If found, use the column of '{' as a min column for line
4587  // comment section continuation.
4588  const FormatToken *PreviousToken = nullptr;
4589  for (const UnwrappedLineNode &Node : Line.Tokens) {
4590    if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4591        isLineComment(*Node.Tok)) {
4592      MinColumnToken = PreviousToken;
4593      break;
4594    }
4595    PreviousToken = Node.Tok;
4596
4597    // Grab the last newline preceding a token in this unwrapped line.
4598    if (Node.Tok->NewlinesBefore > 0)
4599      MinColumnToken = Node.Tok;
4600  }
4601  if (PreviousToken && PreviousToken->is(tok::l_brace))
4602    MinColumnToken = PreviousToken;
4603
4604  return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4605                              MinColumnToken);
4606}
4607
4608void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4609  bool JustComments = Line->Tokens.empty();
4610  for (FormatToken *Tok : CommentsBeforeNextToken) {
4611    // Line comments that belong to the same line comment section are put on the
4612    // same line since later we might want to reflow content between them.
4613    // Additional fine-grained breaking of line comment sections is controlled
4614    // by the class BreakableLineCommentSection in case it is desirable to keep
4615    // several line comment sections in the same unwrapped line.
4616    //
4617    // FIXME: Consider putting separate line comment sections as children to the
4618    // unwrapped line instead.
4619    Tok->ContinuesLineCommentSection =
4620        continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4621    if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4622      addUnwrappedLine();
4623    pushToken(Tok);
4624  }
4625  if (NewlineBeforeNext && JustComments)
4626    addUnwrappedLine();
4627  CommentsBeforeNextToken.clear();
4628}
4629
4630void UnwrappedLineParser::nextToken(int LevelDifference) {
4631  if (eof())
4632    return;
4633  flushComments(isOnNewLine(*FormatTok));
4634  pushToken(FormatTok);
4635  FormatToken *Previous = FormatTok;
4636  if (!Style.isJavaScript())
4637    readToken(LevelDifference);
4638  else
4639    readTokenWithJavaScriptASI();
4640  FormatTok->Previous = Previous;
4641  if (Style.isVerilog()) {
4642    // Blocks in Verilog can have `begin` and `end` instead of braces.  For
4643    // keywords like `begin`, we can't treat them the same as left braces
4644    // because some contexts require one of them.  For example structs use
4645    // braces and if blocks use keywords, and a left brace can occur in an if
4646    // statement, but it is not a block.  For keywords like `end`, we simply
4647    // treat them the same as right braces.
4648    if (Keywords.isVerilogEnd(*FormatTok))
4649      FormatTok->Tok.setKind(tok::r_brace);
4650  }
4651}
4652
4653void UnwrappedLineParser::distributeComments(
4654    const SmallVectorImpl<FormatToken *> &Comments,
4655    const FormatToken *NextTok) {
4656  // Whether or not a line comment token continues a line is controlled by
4657  // the method continuesLineCommentSection, with the following caveat:
4658  //
4659  // Define a trail of Comments to be a nonempty proper postfix of Comments such
4660  // that each comment line from the trail is aligned with the next token, if
4661  // the next token exists. If a trail exists, the beginning of the maximal
4662  // trail is marked as a start of a new comment section.
4663  //
4664  // For example in this code:
4665  //
4666  // int a; // line about a
4667  //   // line 1 about b
4668  //   // line 2 about b
4669  //   int b;
4670  //
4671  // the two lines about b form a maximal trail, so there are two sections, the
4672  // first one consisting of the single comment "// line about a" and the
4673  // second one consisting of the next two comments.
4674  if (Comments.empty())
4675    return;
4676  bool ShouldPushCommentsInCurrentLine = true;
4677  bool HasTrailAlignedWithNextToken = false;
4678  unsigned StartOfTrailAlignedWithNextToken = 0;
4679  if (NextTok) {
4680    // We are skipping the first element intentionally.
4681    for (unsigned i = Comments.size() - 1; i > 0; --i) {
4682      if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4683        HasTrailAlignedWithNextToken = true;
4684        StartOfTrailAlignedWithNextToken = i;
4685      }
4686    }
4687  }
4688  for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4689    FormatToken *FormatTok = Comments[i];
4690    if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4691      FormatTok->ContinuesLineCommentSection = false;
4692    } else {
4693      FormatTok->ContinuesLineCommentSection =
4694          continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4695    }
4696    if (!FormatTok->ContinuesLineCommentSection &&
4697        (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4698      ShouldPushCommentsInCurrentLine = false;
4699    }
4700    if (ShouldPushCommentsInCurrentLine)
4701      pushToken(FormatTok);
4702    else
4703      CommentsBeforeNextToken.push_back(FormatTok);
4704  }
4705}
4706
4707void UnwrappedLineParser::readToken(int LevelDifference) {
4708  SmallVector<FormatToken *, 1> Comments;
4709  bool PreviousWasComment = false;
4710  bool FirstNonCommentOnLine = false;
4711  do {
4712    FormatTok = Tokens->getNextToken();
4713    assert(FormatTok);
4714    while (FormatTok->getType() == TT_ConflictStart ||
4715           FormatTok->getType() == TT_ConflictEnd ||
4716           FormatTok->getType() == TT_ConflictAlternative) {
4717      if (FormatTok->getType() == TT_ConflictStart)
4718        conditionalCompilationStart(/*Unreachable=*/false);
4719      else if (FormatTok->getType() == TT_ConflictAlternative)
4720        conditionalCompilationAlternative();
4721      else if (FormatTok->getType() == TT_ConflictEnd)
4722        conditionalCompilationEnd();
4723      FormatTok = Tokens->getNextToken();
4724      FormatTok->MustBreakBefore = true;
4725      FormatTok->MustBreakBeforeFinalized = true;
4726    }
4727
4728    auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4729                                      const FormatToken &Tok,
4730                                      bool PreviousWasComment) {
4731      auto IsFirstOnLine = [](const FormatToken &Tok) {
4732        return Tok.HasUnescapedNewline || Tok.IsFirst;
4733      };
4734
4735      // Consider preprocessor directives preceded by block comments as first
4736      // on line.
4737      if (PreviousWasComment)
4738        return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4739      return IsFirstOnLine(Tok);
4740    };
4741
4742    FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4743        FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4744    PreviousWasComment = FormatTok->is(tok::comment);
4745
4746    while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4747           (!Style.isVerilog() ||
4748            Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4749           FirstNonCommentOnLine) {
4750      distributeComments(Comments, FormatTok);
4751      Comments.clear();
4752      // If there is an unfinished unwrapped line, we flush the preprocessor
4753      // directives only after that unwrapped line was finished later.
4754      bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4755      ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4756      assert((LevelDifference >= 0 ||
4757              static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4758             "LevelDifference makes Line->Level negative");
4759      Line->Level += LevelDifference;
4760      // Comments stored before the preprocessor directive need to be output
4761      // before the preprocessor directive, at the same level as the
4762      // preprocessor directive, as we consider them to apply to the directive.
4763      if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4764          PPBranchLevel > 0) {
4765        Line->Level += PPBranchLevel;
4766      }
4767      flushComments(isOnNewLine(*FormatTok));
4768      parsePPDirective();
4769      PreviousWasComment = FormatTok->is(tok::comment);
4770      FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4771          FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4772    }
4773
4774    if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4775        !Line->InPPDirective) {
4776      continue;
4777    }
4778
4779    if (FormatTok->is(tok::identifier) &&
4780        Macros.defined(FormatTok->TokenText) &&
4781        // FIXME: Allow expanding macros in preprocessor directives.
4782        !Line->InPPDirective) {
4783      FormatToken *ID = FormatTok;
4784      unsigned Position = Tokens->getPosition();
4785
4786      // To correctly parse the code, we need to replace the tokens of the macro
4787      // call with its expansion.
4788      auto PreCall = std::move(Line);
4789      Line.reset(new UnwrappedLine);
4790      bool OldInExpansion = InExpansion;
4791      InExpansion = true;
4792      // We parse the macro call into a new line.
4793      auto Args = parseMacroCall();
4794      InExpansion = OldInExpansion;
4795      assert(Line->Tokens.front().Tok == ID);
4796      // And remember the unexpanded macro call tokens.
4797      auto UnexpandedLine = std::move(Line);
4798      // Reset to the old line.
4799      Line = std::move(PreCall);
4800
4801      LLVM_DEBUG({
4802        llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4803        if (Args) {
4804          llvm::dbgs() << "(";
4805          for (const auto &Arg : Args.value())
4806            for (const auto &T : Arg)
4807              llvm::dbgs() << T->TokenText << " ";
4808          llvm::dbgs() << ")";
4809        }
4810        llvm::dbgs() << "\n";
4811      });
4812      if (Macros.objectLike(ID->TokenText) && Args &&
4813          !Macros.hasArity(ID->TokenText, Args->size())) {
4814        // The macro is either
4815        // - object-like, but we got argumnets, or
4816        // - overloaded to be both object-like and function-like, but none of
4817        //   the function-like arities match the number of arguments.
4818        // Thus, expand as object-like macro.
4819        LLVM_DEBUG(llvm::dbgs()
4820                   << "Macro \"" << ID->TokenText
4821                   << "\" not overloaded for arity " << Args->size()
4822                   << "or not function-like, using object-like overload.");
4823        Args.reset();
4824        UnexpandedLine->Tokens.resize(1);
4825        Tokens->setPosition(Position);
4826        nextToken();
4827        assert(!Args && Macros.objectLike(ID->TokenText));
4828      }
4829      if ((!Args && Macros.objectLike(ID->TokenText)) ||
4830          (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4831        // Next, we insert the expanded tokens in the token stream at the
4832        // current position, and continue parsing.
4833        Unexpanded[ID] = std::move(UnexpandedLine);
4834        SmallVector<FormatToken *, 8> Expansion =
4835            Macros.expand(ID, std::move(Args));
4836        if (!Expansion.empty())
4837          FormatTok = Tokens->insertTokens(Expansion);
4838
4839        LLVM_DEBUG({
4840          llvm::dbgs() << "Expanded: ";
4841          for (const auto &T : Expansion)
4842            llvm::dbgs() << T->TokenText << " ";
4843          llvm::dbgs() << "\n";
4844        });
4845      } else {
4846        LLVM_DEBUG({
4847          llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4848                       << "\", because it was used ";
4849          if (Args)
4850            llvm::dbgs() << "with " << Args->size();
4851          else
4852            llvm::dbgs() << "without";
4853          llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4854        });
4855        Tokens->setPosition(Position);
4856        FormatTok = ID;
4857      }
4858    }
4859
4860    if (FormatTok->isNot(tok::comment)) {
4861      distributeComments(Comments, FormatTok);
4862      Comments.clear();
4863      return;
4864    }
4865
4866    Comments.push_back(FormatTok);
4867  } while (!eof());
4868
4869  distributeComments(Comments, nullptr);
4870  Comments.clear();
4871}
4872
4873namespace {
4874template <typename Iterator>
4875void pushTokens(Iterator Begin, Iterator End,
4876                llvm::SmallVectorImpl<FormatToken *> &Into) {
4877  for (auto I = Begin; I != End; ++I) {
4878    Into.push_back(I->Tok);
4879    for (const auto &Child : I->Children)
4880      pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4881  }
4882}
4883} // namespace
4884
4885std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
4886UnwrappedLineParser::parseMacroCall() {
4887  std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4888  assert(Line->Tokens.empty());
4889  nextToken();
4890  if (FormatTok->isNot(tok::l_paren))
4891    return Args;
4892  unsigned Position = Tokens->getPosition();
4893  FormatToken *Tok = FormatTok;
4894  nextToken();
4895  Args.emplace();
4896  auto ArgStart = std::prev(Line->Tokens.end());
4897
4898  int Parens = 0;
4899  do {
4900    switch (FormatTok->Tok.getKind()) {
4901    case tok::l_paren:
4902      ++Parens;
4903      nextToken();
4904      break;
4905    case tok::r_paren: {
4906      if (Parens > 0) {
4907        --Parens;
4908        nextToken();
4909        break;
4910      }
4911      Args->push_back({});
4912      pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4913      nextToken();
4914      return Args;
4915    }
4916    case tok::comma: {
4917      if (Parens > 0) {
4918        nextToken();
4919        break;
4920      }
4921      Args->push_back({});
4922      pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4923      nextToken();
4924      ArgStart = std::prev(Line->Tokens.end());
4925      break;
4926    }
4927    default:
4928      nextToken();
4929      break;
4930    }
4931  } while (!eof());
4932  Line->Tokens.resize(1);
4933  Tokens->setPosition(Position);
4934  FormatTok = Tok;
4935  return {};
4936}
4937
4938void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4939  Line->Tokens.push_back(UnwrappedLineNode(Tok));
4940  if (MustBreakBeforeNextToken) {
4941    Line->Tokens.back().Tok->MustBreakBefore = true;
4942    Line->Tokens.back().Tok->MustBreakBeforeFinalized = true;
4943    MustBreakBeforeNextToken = false;
4944  }
4945}
4946
4947} // end namespace format
4948} // end namespace clang
4949