1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the implementation of the UnwrappedLineParser,
11/// which turns a stream of tokens into UnwrappedLines.
12///
13//===----------------------------------------------------------------------===//
14
15#include "UnwrappedLineParser.h"
16#include "FormatToken.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/Support/Debug.h"
19#include "llvm/Support/raw_ostream.h"
20
21#include <algorithm>
22
23#define DEBUG_TYPE "format-parser"
24
25namespace clang {
26namespace format {
27
28class FormatTokenSource {
29public:
30  virtual ~FormatTokenSource() {}
31  virtual FormatToken *getNextToken() = 0;
32
33  virtual unsigned getPosition() = 0;
34  virtual FormatToken *setPosition(unsigned Position) = 0;
35};
36
37namespace {
38
39class ScopedDeclarationState {
40public:
41  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42                         bool MustBeDeclaration)
43      : Line(Line), Stack(Stack) {
44    Line.MustBeDeclaration = MustBeDeclaration;
45    Stack.push_back(MustBeDeclaration);
46  }
47  ~ScopedDeclarationState() {
48    Stack.pop_back();
49    if (!Stack.empty())
50      Line.MustBeDeclaration = Stack.back();
51    else
52      Line.MustBeDeclaration = true;
53  }
54
55private:
56  UnwrappedLine &Line;
57  std::vector<bool> &Stack;
58};
59
60static bool isLineComment(const FormatToken &FormatTok) {
61  return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62}
63
64// Checks if \p FormatTok is a line comment that continues the line comment
65// \p Previous. The original column of \p MinColumnToken is used to determine
66// whether \p FormatTok is indented enough to the right to continue \p Previous.
67static bool continuesLineComment(const FormatToken &FormatTok,
68                                 const FormatToken *Previous,
69                                 const FormatToken *MinColumnToken) {
70  if (!Previous || !MinColumnToken)
71    return false;
72  unsigned MinContinueColumn =
73      MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75         isLineComment(*Previous) &&
76         FormatTok.OriginalColumn >= MinContinueColumn;
77}
78
79class ScopedMacroState : public FormatTokenSource {
80public:
81  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82                   FormatToken *&ResetToken)
83      : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85        Token(nullptr), PreviousToken(nullptr) {
86    FakeEOF.Tok.startToken();
87    FakeEOF.Tok.setKind(tok::eof);
88    TokenSource = this;
89    Line.Level = 0;
90    Line.InPPDirective = true;
91  }
92
93  ~ScopedMacroState() override {
94    TokenSource = PreviousTokenSource;
95    ResetToken = Token;
96    Line.InPPDirective = false;
97    Line.Level = PreviousLineLevel;
98  }
99
100  FormatToken *getNextToken() override {
101    // The \c UnwrappedLineParser guards against this by never calling
102    // \c getNextToken() after it has encountered the first eof token.
103    assert(!eof());
104    PreviousToken = Token;
105    Token = PreviousTokenSource->getNextToken();
106    if (eof())
107      return &FakeEOF;
108    return Token;
109  }
110
111  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112
113  FormatToken *setPosition(unsigned Position) override {
114    PreviousToken = nullptr;
115    Token = PreviousTokenSource->setPosition(Position);
116    return Token;
117  }
118
119private:
120  bool eof() {
121    return Token && Token->HasUnescapedNewline &&
122           !continuesLineComment(*Token, PreviousToken,
123                                 /*MinColumnToken=*/PreviousToken);
124  }
125
126  FormatToken FakeEOF;
127  UnwrappedLine &Line;
128  FormatTokenSource *&TokenSource;
129  FormatToken *&ResetToken;
130  unsigned PreviousLineLevel;
131  FormatTokenSource *PreviousTokenSource;
132
133  FormatToken *Token;
134  FormatToken *PreviousToken;
135};
136
137} // end anonymous namespace
138
139class ScopedLineState {
140public:
141  ScopedLineState(UnwrappedLineParser &Parser,
142                  bool SwitchToPreprocessorLines = false)
143      : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144    if (SwitchToPreprocessorLines)
145      Parser.CurrentLines = &Parser.PreprocessorDirectives;
146    else if (!Parser.Line->Tokens.empty())
147      Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148    PreBlockLine = std::move(Parser.Line);
149    Parser.Line = std::make_unique<UnwrappedLine>();
150    Parser.Line->Level = PreBlockLine->Level;
151    Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152  }
153
154  ~ScopedLineState() {
155    if (!Parser.Line->Tokens.empty()) {
156      Parser.addUnwrappedLine();
157    }
158    assert(Parser.Line->Tokens.empty());
159    Parser.Line = std::move(PreBlockLine);
160    if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161      Parser.MustBreakBeforeNextToken = true;
162    Parser.CurrentLines = OriginalLines;
163  }
164
165private:
166  UnwrappedLineParser &Parser;
167
168  std::unique_ptr<UnwrappedLine> PreBlockLine;
169  SmallVectorImpl<UnwrappedLine> *OriginalLines;
170};
171
172class CompoundStatementIndenter {
173public:
174  CompoundStatementIndenter(UnwrappedLineParser *Parser,
175                            const FormatStyle &Style, unsigned &LineLevel)
176      : CompoundStatementIndenter(Parser, LineLevel,
177                                  Style.BraceWrapping.AfterControlStatement,
178                                  Style.BraceWrapping.IndentBraces) {}
179  CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
180                            bool WrapBrace, bool IndentBrace)
181      : LineLevel(LineLevel), OldLineLevel(LineLevel) {
182    if (WrapBrace)
183      Parser->addUnwrappedLine();
184    if (IndentBrace)
185      ++LineLevel;
186  }
187  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
188
189private:
190  unsigned &LineLevel;
191  unsigned OldLineLevel;
192};
193
194namespace {
195
196class IndexedTokenSource : public FormatTokenSource {
197public:
198  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
199      : Tokens(Tokens), Position(-1) {}
200
201  FormatToken *getNextToken() override {
202    ++Position;
203    return Tokens[Position];
204  }
205
206  unsigned getPosition() override {
207    assert(Position >= 0);
208    return Position;
209  }
210
211  FormatToken *setPosition(unsigned P) override {
212    Position = P;
213    return Tokens[Position];
214  }
215
216  void reset() { Position = -1; }
217
218private:
219  ArrayRef<FormatToken *> Tokens;
220  int Position;
221};
222
223} // end anonymous namespace
224
225UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
226                                         const AdditionalKeywords &Keywords,
227                                         unsigned FirstStartColumn,
228                                         ArrayRef<FormatToken *> Tokens,
229                                         UnwrappedLineConsumer &Callback)
230    : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
231      CurrentLines(&Lines), Style(Style), Keywords(Keywords),
232      CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
233      Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
234      IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
235                       ? IG_Rejected
236                       : IG_Inited),
237      IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
238
239void UnwrappedLineParser::reset() {
240  PPBranchLevel = -1;
241  IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
242                     ? IG_Rejected
243                     : IG_Inited;
244  IncludeGuardToken = nullptr;
245  Line.reset(new UnwrappedLine);
246  CommentsBeforeNextToken.clear();
247  FormatTok = nullptr;
248  MustBreakBeforeNextToken = false;
249  PreprocessorDirectives.clear();
250  CurrentLines = &Lines;
251  DeclarationScopeStack.clear();
252  PPStack.clear();
253  Line->FirstStartColumn = FirstStartColumn;
254}
255
256void UnwrappedLineParser::parse() {
257  IndexedTokenSource TokenSource(AllTokens);
258  Line->FirstStartColumn = FirstStartColumn;
259  do {
260    LLVM_DEBUG(llvm::dbgs() << "----\n");
261    reset();
262    Tokens = &TokenSource;
263    TokenSource.reset();
264
265    readToken();
266    parseFile();
267
268    // If we found an include guard then all preprocessor directives (other than
269    // the guard) are over-indented by one.
270    if (IncludeGuard == IG_Found)
271      for (auto &Line : Lines)
272        if (Line.InPPDirective && Line.Level > 0)
273          --Line.Level;
274
275    // Create line with eof token.
276    pushToken(FormatTok);
277    addUnwrappedLine();
278
279    for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
280                                                  E = Lines.end();
281         I != E; ++I) {
282      Callback.consumeUnwrappedLine(*I);
283    }
284    Callback.finishRun();
285    Lines.clear();
286    while (!PPLevelBranchIndex.empty() &&
287           PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
288      PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
289      PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
290    }
291    if (!PPLevelBranchIndex.empty()) {
292      ++PPLevelBranchIndex.back();
293      assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
294      assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
295    }
296  } while (!PPLevelBranchIndex.empty());
297}
298
299void UnwrappedLineParser::parseFile() {
300  // The top-level context in a file always has declarations, except for pre-
301  // processor directives and JavaScript files.
302  bool MustBeDeclaration =
303      !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
304  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
305                                          MustBeDeclaration);
306  if (Style.Language == FormatStyle::LK_TextProto)
307    parseBracedList();
308  else
309    parseLevel(/*HasOpeningBrace=*/false);
310  // Make sure to format the remaining tokens.
311  //
312  // LK_TextProto is special since its top-level is parsed as the body of a
313  // braced list, which does not necessarily have natural line separators such
314  // as a semicolon. Comments after the last entry that have been determined to
315  // not belong to that line, as in:
316  //   key: value
317  //   // endfile comment
318  // do not have a chance to be put on a line of their own until this point.
319  // Here we add this newline before end-of-file comments.
320  if (Style.Language == FormatStyle::LK_TextProto &&
321      !CommentsBeforeNextToken.empty())
322    addUnwrappedLine();
323  flushComments(true);
324  addUnwrappedLine();
325}
326
327void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
328  do {
329    switch (FormatTok->Tok.getKind()) {
330    case tok::l_brace:
331      return;
332    default:
333      if (FormatTok->is(Keywords.kw_where)) {
334        addUnwrappedLine();
335        nextToken();
336        parseCSharpGenericTypeConstraint();
337        break;
338      }
339      nextToken();
340      break;
341    }
342  } while (!eof());
343}
344
345void UnwrappedLineParser::parseCSharpAttribute() {
346  int UnpairedSquareBrackets = 1;
347  do {
348    switch (FormatTok->Tok.getKind()) {
349    case tok::r_square:
350      nextToken();
351      --UnpairedSquareBrackets;
352      if (UnpairedSquareBrackets == 0) {
353        addUnwrappedLine();
354        return;
355      }
356      break;
357    case tok::l_square:
358      ++UnpairedSquareBrackets;
359      nextToken();
360      break;
361    default:
362      nextToken();
363      break;
364    }
365  } while (!eof());
366}
367
368void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
369  bool SwitchLabelEncountered = false;
370  do {
371    tok::TokenKind kind = FormatTok->Tok.getKind();
372    if (FormatTok->getType() == TT_MacroBlockBegin) {
373      kind = tok::l_brace;
374    } else if (FormatTok->getType() == TT_MacroBlockEnd) {
375      kind = tok::r_brace;
376    }
377
378    switch (kind) {
379    case tok::comment:
380      nextToken();
381      addUnwrappedLine();
382      break;
383    case tok::l_brace:
384      // FIXME: Add parameter whether this can happen - if this happens, we must
385      // be in a non-declaration context.
386      if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
387        continue;
388      parseBlock(/*MustBeDeclaration=*/false);
389      addUnwrappedLine();
390      break;
391    case tok::r_brace:
392      if (HasOpeningBrace)
393        return;
394      nextToken();
395      addUnwrappedLine();
396      break;
397    case tok::kw_default: {
398      unsigned StoredPosition = Tokens->getPosition();
399      FormatToken *Next;
400      do {
401        Next = Tokens->getNextToken();
402      } while (Next && Next->is(tok::comment));
403      FormatTok = Tokens->setPosition(StoredPosition);
404      if (Next && Next->isNot(tok::colon)) {
405        // default not followed by ':' is not a case label; treat it like
406        // an identifier.
407        parseStructuralElement();
408        break;
409      }
410      // Else, if it is 'default:', fall through to the case handling.
411      LLVM_FALLTHROUGH;
412    }
413    case tok::kw_case:
414      if (Style.Language == FormatStyle::LK_JavaScript &&
415          Line->MustBeDeclaration) {
416        // A 'case: string' style field declaration.
417        parseStructuralElement();
418        break;
419      }
420      if (!SwitchLabelEncountered &&
421          (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
422        ++Line->Level;
423      SwitchLabelEncountered = true;
424      parseStructuralElement();
425      break;
426    case tok::l_square:
427      if (Style.isCSharp()) {
428        nextToken();
429        parseCSharpAttribute();
430        break;
431      }
432      LLVM_FALLTHROUGH;
433    default:
434      parseStructuralElement();
435      break;
436    }
437  } while (!eof());
438}
439
440void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
441  // We'll parse forward through the tokens until we hit
442  // a closing brace or eof - note that getNextToken() will
443  // parse macros, so this will magically work inside macro
444  // definitions, too.
445  unsigned StoredPosition = Tokens->getPosition();
446  FormatToken *Tok = FormatTok;
447  const FormatToken *PrevTok = Tok->Previous;
448  // Keep a stack of positions of lbrace tokens. We will
449  // update information about whether an lbrace starts a
450  // braced init list or a different block during the loop.
451  SmallVector<FormatToken *, 8> LBraceStack;
452  assert(Tok->Tok.is(tok::l_brace));
453  do {
454    // Get next non-comment token.
455    FormatToken *NextTok;
456    unsigned ReadTokens = 0;
457    do {
458      NextTok = Tokens->getNextToken();
459      ++ReadTokens;
460    } while (NextTok->is(tok::comment));
461
462    switch (Tok->Tok.getKind()) {
463    case tok::l_brace:
464      if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
465        if (PrevTok->isOneOf(tok::colon, tok::less))
466          // A ':' indicates this code is in a type, or a braced list
467          // following a label in an object literal ({a: {b: 1}}).
468          // A '<' could be an object used in a comparison, but that is nonsense
469          // code (can never return true), so more likely it is a generic type
470          // argument (`X<{a: string; b: number}>`).
471          // The code below could be confused by semicolons between the
472          // individual members in a type member list, which would normally
473          // trigger BK_Block. In both cases, this must be parsed as an inline
474          // braced init.
475          Tok->BlockKind = BK_BracedInit;
476        else if (PrevTok->is(tok::r_paren))
477          // `) { }` can only occur in function or method declarations in JS.
478          Tok->BlockKind = BK_Block;
479      } else {
480        Tok->BlockKind = BK_Unknown;
481      }
482      LBraceStack.push_back(Tok);
483      break;
484    case tok::r_brace:
485      if (LBraceStack.empty())
486        break;
487      if (LBraceStack.back()->BlockKind == BK_Unknown) {
488        bool ProbablyBracedList = false;
489        if (Style.Language == FormatStyle::LK_Proto) {
490          ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
491        } else {
492          // Using OriginalColumn to distinguish between ObjC methods and
493          // binary operators is a bit hacky.
494          bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
495                                  NextTok->OriginalColumn == 0;
496
497          // If there is a comma, semicolon or right paren after the closing
498          // brace, we assume this is a braced initializer list.  Note that
499          // regardless how we mark inner braces here, we will overwrite the
500          // BlockKind later if we parse a braced list (where all blocks
501          // inside are by default braced lists), or when we explicitly detect
502          // blocks (for example while parsing lambdas).
503          // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
504          // braced list in JS.
505          ProbablyBracedList =
506              (Style.Language == FormatStyle::LK_JavaScript &&
507               NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
508                                Keywords.kw_as)) ||
509              (Style.isCpp() && NextTok->is(tok::l_paren)) ||
510              NextTok->isOneOf(tok::comma, tok::period, tok::colon,
511                               tok::r_paren, tok::r_square, tok::l_brace,
512                               tok::ellipsis) ||
513              (NextTok->is(tok::identifier) &&
514               !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
515              (NextTok->is(tok::semi) &&
516               (!ExpectClassBody || LBraceStack.size() != 1)) ||
517              (NextTok->isBinaryOperator() && !NextIsObjCMethod);
518          if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
519            // We can have an array subscript after a braced init
520            // list, but C++11 attributes are expected after blocks.
521            NextTok = Tokens->getNextToken();
522            ++ReadTokens;
523            ProbablyBracedList = NextTok->isNot(tok::l_square);
524          }
525        }
526        if (ProbablyBracedList) {
527          Tok->BlockKind = BK_BracedInit;
528          LBraceStack.back()->BlockKind = BK_BracedInit;
529        } else {
530          Tok->BlockKind = BK_Block;
531          LBraceStack.back()->BlockKind = BK_Block;
532        }
533      }
534      LBraceStack.pop_back();
535      break;
536    case tok::identifier:
537      if (!Tok->is(TT_StatementMacro))
538        break;
539      LLVM_FALLTHROUGH;
540    case tok::at:
541    case tok::semi:
542    case tok::kw_if:
543    case tok::kw_while:
544    case tok::kw_for:
545    case tok::kw_switch:
546    case tok::kw_try:
547    case tok::kw___try:
548      if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
549        LBraceStack.back()->BlockKind = BK_Block;
550      break;
551    default:
552      break;
553    }
554    PrevTok = Tok;
555    Tok = NextTok;
556  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
557
558  // Assume other blocks for all unclosed opening braces.
559  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
560    if (LBraceStack[i]->BlockKind == BK_Unknown)
561      LBraceStack[i]->BlockKind = BK_Block;
562  }
563
564  FormatTok = Tokens->setPosition(StoredPosition);
565}
566
567template <class T>
568static inline void hash_combine(std::size_t &seed, const T &v) {
569  std::hash<T> hasher;
570  seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
571}
572
573size_t UnwrappedLineParser::computePPHash() const {
574  size_t h = 0;
575  for (const auto &i : PPStack) {
576    hash_combine(h, size_t(i.Kind));
577    hash_combine(h, i.Line);
578  }
579  return h;
580}
581
582void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
583                                     bool MunchSemi) {
584  assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
585         "'{' or macro block token expected");
586  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
587  FormatTok->BlockKind = BK_Block;
588
589  size_t PPStartHash = computePPHash();
590
591  unsigned InitialLevel = Line->Level;
592  nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
593
594  if (MacroBlock && FormatTok->is(tok::l_paren))
595    parseParens();
596
597  size_t NbPreprocessorDirectives =
598      CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
599  addUnwrappedLine();
600  size_t OpeningLineIndex =
601      CurrentLines->empty()
602          ? (UnwrappedLine::kInvalidIndex)
603          : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
604
605  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
606                                          MustBeDeclaration);
607  if (AddLevel)
608    ++Line->Level;
609  parseLevel(/*HasOpeningBrace=*/true);
610
611  if (eof())
612    return;
613
614  if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
615                 : !FormatTok->is(tok::r_brace)) {
616    Line->Level = InitialLevel;
617    FormatTok->BlockKind = BK_Block;
618    return;
619  }
620
621  size_t PPEndHash = computePPHash();
622
623  // Munch the closing brace.
624  nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
625
626  if (MacroBlock && FormatTok->is(tok::l_paren))
627    parseParens();
628
629  if (MunchSemi && FormatTok->Tok.is(tok::semi))
630    nextToken();
631  Line->Level = InitialLevel;
632
633  if (PPStartHash == PPEndHash) {
634    Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
635    if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
636      // Update the opening line to add the forward reference as well
637      (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
638          CurrentLines->size() - 1;
639    }
640  }
641}
642
643static bool isGoogScope(const UnwrappedLine &Line) {
644  // FIXME: Closure-library specific stuff should not be hard-coded but be
645  // configurable.
646  if (Line.Tokens.size() < 4)
647    return false;
648  auto I = Line.Tokens.begin();
649  if (I->Tok->TokenText != "goog")
650    return false;
651  ++I;
652  if (I->Tok->isNot(tok::period))
653    return false;
654  ++I;
655  if (I->Tok->TokenText != "scope")
656    return false;
657  ++I;
658  return I->Tok->is(tok::l_paren);
659}
660
661static bool isIIFE(const UnwrappedLine &Line,
662                   const AdditionalKeywords &Keywords) {
663  // Look for the start of an immediately invoked anonymous function.
664  // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
665  // This is commonly done in JavaScript to create a new, anonymous scope.
666  // Example: (function() { ... })()
667  if (Line.Tokens.size() < 3)
668    return false;
669  auto I = Line.Tokens.begin();
670  if (I->Tok->isNot(tok::l_paren))
671    return false;
672  ++I;
673  if (I->Tok->isNot(Keywords.kw_function))
674    return false;
675  ++I;
676  return I->Tok->is(tok::l_paren);
677}
678
679static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
680                                   const FormatToken &InitialToken) {
681  if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
682    return Style.BraceWrapping.AfterNamespace;
683  if (InitialToken.is(tok::kw_class))
684    return Style.BraceWrapping.AfterClass;
685  if (InitialToken.is(tok::kw_union))
686    return Style.BraceWrapping.AfterUnion;
687  if (InitialToken.is(tok::kw_struct))
688    return Style.BraceWrapping.AfterStruct;
689  return false;
690}
691
692void UnwrappedLineParser::parseChildBlock() {
693  FormatTok->BlockKind = BK_Block;
694  nextToken();
695  {
696    bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
697                       (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
698    ScopedLineState LineState(*this);
699    ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
700                                            /*MustBeDeclaration=*/false);
701    Line->Level += SkipIndent ? 0 : 1;
702    parseLevel(/*HasOpeningBrace=*/true);
703    flushComments(isOnNewLine(*FormatTok));
704    Line->Level -= SkipIndent ? 0 : 1;
705  }
706  nextToken();
707}
708
709void UnwrappedLineParser::parsePPDirective() {
710  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
711  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
712
713  nextToken();
714
715  if (!FormatTok->Tok.getIdentifierInfo()) {
716    parsePPUnknown();
717    return;
718  }
719
720  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
721  case tok::pp_define:
722    parsePPDefine();
723    return;
724  case tok::pp_if:
725    parsePPIf(/*IfDef=*/false);
726    break;
727  case tok::pp_ifdef:
728  case tok::pp_ifndef:
729    parsePPIf(/*IfDef=*/true);
730    break;
731  case tok::pp_else:
732    parsePPElse();
733    break;
734  case tok::pp_elif:
735    parsePPElIf();
736    break;
737  case tok::pp_endif:
738    parsePPEndIf();
739    break;
740  default:
741    parsePPUnknown();
742    break;
743  }
744}
745
746void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
747  size_t Line = CurrentLines->size();
748  if (CurrentLines == &PreprocessorDirectives)
749    Line += Lines.size();
750
751  if (Unreachable ||
752      (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
753    PPStack.push_back({PP_Unreachable, Line});
754  else
755    PPStack.push_back({PP_Conditional, Line});
756}
757
758void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
759  ++PPBranchLevel;
760  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
761  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
762    PPLevelBranchIndex.push_back(0);
763    PPLevelBranchCount.push_back(0);
764  }
765  PPChainBranchIndex.push(0);
766  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
767  conditionalCompilationCondition(Unreachable || Skip);
768}
769
770void UnwrappedLineParser::conditionalCompilationAlternative() {
771  if (!PPStack.empty())
772    PPStack.pop_back();
773  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
774  if (!PPChainBranchIndex.empty())
775    ++PPChainBranchIndex.top();
776  conditionalCompilationCondition(
777      PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
778      PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
779}
780
781void UnwrappedLineParser::conditionalCompilationEnd() {
782  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
783  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
784    if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
785      PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
786    }
787  }
788  // Guard against #endif's without #if.
789  if (PPBranchLevel > -1)
790    --PPBranchLevel;
791  if (!PPChainBranchIndex.empty())
792    PPChainBranchIndex.pop();
793  if (!PPStack.empty())
794    PPStack.pop_back();
795}
796
797void UnwrappedLineParser::parsePPIf(bool IfDef) {
798  bool IfNDef = FormatTok->is(tok::pp_ifndef);
799  nextToken();
800  bool Unreachable = false;
801  if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
802    Unreachable = true;
803  if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
804    Unreachable = true;
805  conditionalCompilationStart(Unreachable);
806  FormatToken *IfCondition = FormatTok;
807  // If there's a #ifndef on the first line, and the only lines before it are
808  // comments, it could be an include guard.
809  bool MaybeIncludeGuard = IfNDef;
810  if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
811    for (auto &Line : Lines) {
812      if (!Line.Tokens.front().Tok->is(tok::comment)) {
813        MaybeIncludeGuard = false;
814        IncludeGuard = IG_Rejected;
815        break;
816      }
817    }
818  --PPBranchLevel;
819  parsePPUnknown();
820  ++PPBranchLevel;
821  if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
822    IncludeGuard = IG_IfNdefed;
823    IncludeGuardToken = IfCondition;
824  }
825}
826
827void UnwrappedLineParser::parsePPElse() {
828  // If a potential include guard has an #else, it's not an include guard.
829  if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
830    IncludeGuard = IG_Rejected;
831  conditionalCompilationAlternative();
832  if (PPBranchLevel > -1)
833    --PPBranchLevel;
834  parsePPUnknown();
835  ++PPBranchLevel;
836}
837
838void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
839
840void UnwrappedLineParser::parsePPEndIf() {
841  conditionalCompilationEnd();
842  parsePPUnknown();
843  // If the #endif of a potential include guard is the last thing in the file,
844  // then we found an include guard.
845  unsigned TokenPosition = Tokens->getPosition();
846  FormatToken *PeekNext = AllTokens[TokenPosition];
847  if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
848      PeekNext->is(tok::eof) &&
849      Style.IndentPPDirectives != FormatStyle::PPDIS_None)
850    IncludeGuard = IG_Found;
851}
852
853void UnwrappedLineParser::parsePPDefine() {
854  nextToken();
855
856  if (!FormatTok->Tok.getIdentifierInfo()) {
857    IncludeGuard = IG_Rejected;
858    IncludeGuardToken = nullptr;
859    parsePPUnknown();
860    return;
861  }
862
863  if (IncludeGuard == IG_IfNdefed &&
864      IncludeGuardToken->TokenText == FormatTok->TokenText) {
865    IncludeGuard = IG_Defined;
866    IncludeGuardToken = nullptr;
867    for (auto &Line : Lines) {
868      if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
869        IncludeGuard = IG_Rejected;
870        break;
871      }
872    }
873  }
874
875  nextToken();
876  if (FormatTok->Tok.getKind() == tok::l_paren &&
877      FormatTok->WhitespaceRange.getBegin() ==
878          FormatTok->WhitespaceRange.getEnd()) {
879    parseParens();
880  }
881  if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
882    Line->Level += PPBranchLevel + 1;
883  addUnwrappedLine();
884  ++Line->Level;
885
886  // Errors during a preprocessor directive can only affect the layout of the
887  // preprocessor directive, and thus we ignore them. An alternative approach
888  // would be to use the same approach we use on the file level (no
889  // re-indentation if there was a structural error) within the macro
890  // definition.
891  parseFile();
892}
893
894void UnwrappedLineParser::parsePPUnknown() {
895  do {
896    nextToken();
897  } while (!eof());
898  if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
899    Line->Level += PPBranchLevel + 1;
900  addUnwrappedLine();
901}
902
903// Here we exclude certain tokens that are not usually the first token in an
904// unwrapped line. This is used in attempt to distinguish macro calls without
905// trailing semicolons from other constructs split to several lines.
906static bool tokenCanStartNewLine(const FormatToken &Tok) {
907  // Semicolon can be a null-statement, l_square can be a start of a macro or
908  // a C++11 attribute, but this doesn't seem to be common.
909  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
910         Tok.isNot(TT_AttributeSquare) &&
911         // Tokens that can only be used as binary operators and a part of
912         // overloaded operator names.
913         Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
914         Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
915         Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
916         Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
917         Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
918         Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
919         Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
920         Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
921         Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
922         Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
923         Tok.isNot(tok::lesslessequal) &&
924         // Colon is used in labels, base class lists, initializer lists,
925         // range-based for loops, ternary operator, but should never be the
926         // first token in an unwrapped line.
927         Tok.isNot(tok::colon) &&
928         // 'noexcept' is a trailing annotation.
929         Tok.isNot(tok::kw_noexcept);
930}
931
932static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
933                          const FormatToken *FormatTok) {
934  // FIXME: This returns true for C/C++ keywords like 'struct'.
935  return FormatTok->is(tok::identifier) &&
936         (FormatTok->Tok.getIdentifierInfo() == nullptr ||
937          !FormatTok->isOneOf(
938              Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
939              Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
940              Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
941              Keywords.kw_let, Keywords.kw_var, tok::kw_const,
942              Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
943              Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
944              Keywords.kw_from));
945}
946
947static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
948                                 const FormatToken *FormatTok) {
949  return FormatTok->Tok.isLiteral() ||
950         FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
951         mustBeJSIdent(Keywords, FormatTok);
952}
953
954// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
955// when encountered after a value (see mustBeJSIdentOrValue).
956static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
957                           const FormatToken *FormatTok) {
958  return FormatTok->isOneOf(
959      tok::kw_return, Keywords.kw_yield,
960      // conditionals
961      tok::kw_if, tok::kw_else,
962      // loops
963      tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
964      // switch/case
965      tok::kw_switch, tok::kw_case,
966      // exceptions
967      tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
968      // declaration
969      tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
970      Keywords.kw_async, Keywords.kw_function,
971      // import/export
972      Keywords.kw_import, tok::kw_export);
973}
974
975// readTokenWithJavaScriptASI reads the next token and terminates the current
976// line if JavaScript Automatic Semicolon Insertion must
977// happen between the current token and the next token.
978//
979// This method is conservative - it cannot cover all edge cases of JavaScript,
980// but only aims to correctly handle certain well known cases. It *must not*
981// return true in speculative cases.
982void UnwrappedLineParser::readTokenWithJavaScriptASI() {
983  FormatToken *Previous = FormatTok;
984  readToken();
985  FormatToken *Next = FormatTok;
986
987  bool IsOnSameLine =
988      CommentsBeforeNextToken.empty()
989          ? Next->NewlinesBefore == 0
990          : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
991  if (IsOnSameLine)
992    return;
993
994  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
995  bool PreviousStartsTemplateExpr =
996      Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
997  if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
998    // If the line contains an '@' sign, the previous token might be an
999    // annotation, which can precede another identifier/value.
1000    bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
1001                              [](UnwrappedLineNode &LineNode) {
1002                                return LineNode.Tok->is(tok::at);
1003                              }) != Line->Tokens.end();
1004    if (HasAt)
1005      return;
1006  }
1007  if (Next->is(tok::exclaim) && PreviousMustBeValue)
1008    return addUnwrappedLine();
1009  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1010  bool NextEndsTemplateExpr =
1011      Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1012  if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1013      (PreviousMustBeValue ||
1014       Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1015                         tok::minusminus)))
1016    return addUnwrappedLine();
1017  if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1018      isJSDeclOrStmt(Keywords, Next))
1019    return addUnwrappedLine();
1020}
1021
1022void UnwrappedLineParser::parseStructuralElement() {
1023  assert(!FormatTok->is(tok::l_brace));
1024  if (Style.Language == FormatStyle::LK_TableGen &&
1025      FormatTok->is(tok::pp_include)) {
1026    nextToken();
1027    if (FormatTok->is(tok::string_literal))
1028      nextToken();
1029    addUnwrappedLine();
1030    return;
1031  }
1032  switch (FormatTok->Tok.getKind()) {
1033  case tok::kw_asm:
1034    nextToken();
1035    if (FormatTok->is(tok::l_brace)) {
1036      FormatTok->setType(TT_InlineASMBrace);
1037      nextToken();
1038      while (FormatTok && FormatTok->isNot(tok::eof)) {
1039        if (FormatTok->is(tok::r_brace)) {
1040          FormatTok->setType(TT_InlineASMBrace);
1041          nextToken();
1042          addUnwrappedLine();
1043          break;
1044        }
1045        FormatTok->Finalized = true;
1046        nextToken();
1047      }
1048    }
1049    break;
1050  case tok::kw_namespace:
1051    parseNamespace();
1052    return;
1053  case tok::kw_public:
1054  case tok::kw_protected:
1055  case tok::kw_private:
1056    if (Style.Language == FormatStyle::LK_Java ||
1057        Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1058      nextToken();
1059    else
1060      parseAccessSpecifier();
1061    return;
1062  case tok::kw_if:
1063    if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1064      // field/method declaration.
1065      break;
1066    parseIfThenElse();
1067    return;
1068  case tok::kw_for:
1069  case tok::kw_while:
1070    if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1071      // field/method declaration.
1072      break;
1073    parseForOrWhileLoop();
1074    return;
1075  case tok::kw_do:
1076    if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1077      // field/method declaration.
1078      break;
1079    parseDoWhile();
1080    return;
1081  case tok::kw_switch:
1082    if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1083      // 'switch: string' field declaration.
1084      break;
1085    parseSwitch();
1086    return;
1087  case tok::kw_default:
1088    if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1089      // 'default: string' field declaration.
1090      break;
1091    nextToken();
1092    if (FormatTok->is(tok::colon)) {
1093      parseLabel();
1094      return;
1095    }
1096    // e.g. "default void f() {}" in a Java interface.
1097    break;
1098  case tok::kw_case:
1099    if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1100      // 'case: string' field declaration.
1101      break;
1102    parseCaseLabel();
1103    return;
1104  case tok::kw_try:
1105  case tok::kw___try:
1106    if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1107      // field/method declaration.
1108      break;
1109    parseTryCatch();
1110    return;
1111  case tok::kw_extern:
1112    nextToken();
1113    if (FormatTok->Tok.is(tok::string_literal)) {
1114      nextToken();
1115      if (FormatTok->Tok.is(tok::l_brace)) {
1116        if (!Style.IndentExternBlock) {
1117          if (Style.BraceWrapping.AfterExternBlock) {
1118            addUnwrappedLine();
1119          }
1120          parseBlock(/*MustBeDeclaration=*/true,
1121                     /*AddLevel=*/Style.BraceWrapping.AfterExternBlock);
1122        } else {
1123          parseBlock(/*MustBeDeclaration=*/true,
1124                     /*AddLevel=*/Style.IndentExternBlock ==
1125                         FormatStyle::IEBS_Indent);
1126        }
1127        addUnwrappedLine();
1128        return;
1129      }
1130    }
1131    break;
1132  case tok::kw_export:
1133    if (Style.Language == FormatStyle::LK_JavaScript) {
1134      parseJavaScriptEs6ImportExport();
1135      return;
1136    }
1137    if (!Style.isCpp())
1138      break;
1139    // Handle C++ "(inline|export) namespace".
1140    LLVM_FALLTHROUGH;
1141  case tok::kw_inline:
1142    nextToken();
1143    if (FormatTok->Tok.is(tok::kw_namespace)) {
1144      parseNamespace();
1145      return;
1146    }
1147    break;
1148  case tok::identifier:
1149    if (FormatTok->is(TT_ForEachMacro)) {
1150      parseForOrWhileLoop();
1151      return;
1152    }
1153    if (FormatTok->is(TT_MacroBlockBegin)) {
1154      parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1155                 /*MunchSemi=*/false);
1156      return;
1157    }
1158    if (FormatTok->is(Keywords.kw_import)) {
1159      if (Style.Language == FormatStyle::LK_JavaScript) {
1160        parseJavaScriptEs6ImportExport();
1161        return;
1162      }
1163      if (Style.Language == FormatStyle::LK_Proto) {
1164        nextToken();
1165        if (FormatTok->is(tok::kw_public))
1166          nextToken();
1167        if (!FormatTok->is(tok::string_literal))
1168          return;
1169        nextToken();
1170        if (FormatTok->is(tok::semi))
1171          nextToken();
1172        addUnwrappedLine();
1173        return;
1174      }
1175    }
1176    if (Style.isCpp() &&
1177        FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1178                           Keywords.kw_slots, Keywords.kw_qslots)) {
1179      nextToken();
1180      if (FormatTok->is(tok::colon)) {
1181        nextToken();
1182        addUnwrappedLine();
1183        return;
1184      }
1185    }
1186    if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1187      parseStatementMacro();
1188      return;
1189    }
1190    if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1191      parseNamespace();
1192      return;
1193    }
1194    // In all other cases, parse the declaration.
1195    break;
1196  default:
1197    break;
1198  }
1199  do {
1200    const FormatToken *Previous = FormatTok->Previous;
1201    switch (FormatTok->Tok.getKind()) {
1202    case tok::at:
1203      nextToken();
1204      if (FormatTok->Tok.is(tok::l_brace)) {
1205        nextToken();
1206        parseBracedList();
1207        break;
1208      } else if (Style.Language == FormatStyle::LK_Java &&
1209                 FormatTok->is(Keywords.kw_interface)) {
1210        nextToken();
1211        break;
1212      }
1213      switch (FormatTok->Tok.getObjCKeywordID()) {
1214      case tok::objc_public:
1215      case tok::objc_protected:
1216      case tok::objc_package:
1217      case tok::objc_private:
1218        return parseAccessSpecifier();
1219      case tok::objc_interface:
1220      case tok::objc_implementation:
1221        return parseObjCInterfaceOrImplementation();
1222      case tok::objc_protocol:
1223        if (parseObjCProtocol())
1224          return;
1225        break;
1226      case tok::objc_end:
1227        return; // Handled by the caller.
1228      case tok::objc_optional:
1229      case tok::objc_required:
1230        nextToken();
1231        addUnwrappedLine();
1232        return;
1233      case tok::objc_autoreleasepool:
1234        nextToken();
1235        if (FormatTok->Tok.is(tok::l_brace)) {
1236          if (Style.BraceWrapping.AfterControlStatement ==
1237              FormatStyle::BWACS_Always)
1238            addUnwrappedLine();
1239          parseBlock(/*MustBeDeclaration=*/false);
1240        }
1241        addUnwrappedLine();
1242        return;
1243      case tok::objc_synchronized:
1244        nextToken();
1245        if (FormatTok->Tok.is(tok::l_paren))
1246          // Skip synchronization object
1247          parseParens();
1248        if (FormatTok->Tok.is(tok::l_brace)) {
1249          if (Style.BraceWrapping.AfterControlStatement ==
1250              FormatStyle::BWACS_Always)
1251            addUnwrappedLine();
1252          parseBlock(/*MustBeDeclaration=*/false);
1253        }
1254        addUnwrappedLine();
1255        return;
1256      case tok::objc_try:
1257        // This branch isn't strictly necessary (the kw_try case below would
1258        // do this too after the tok::at is parsed above).  But be explicit.
1259        parseTryCatch();
1260        return;
1261      default:
1262        break;
1263      }
1264      break;
1265    case tok::kw_enum:
1266      // Ignore if this is part of "template <enum ...".
1267      if (Previous && Previous->is(tok::less)) {
1268        nextToken();
1269        break;
1270      }
1271
1272      // parseEnum falls through and does not yet add an unwrapped line as an
1273      // enum definition can start a structural element.
1274      if (!parseEnum())
1275        break;
1276      // This only applies for C++.
1277      if (!Style.isCpp()) {
1278        addUnwrappedLine();
1279        return;
1280      }
1281      break;
1282    case tok::kw_typedef:
1283      nextToken();
1284      if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1285                             Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1286                             Keywords.kw_CF_CLOSED_ENUM,
1287                             Keywords.kw_NS_CLOSED_ENUM))
1288        parseEnum();
1289      break;
1290    case tok::kw_struct:
1291    case tok::kw_union:
1292    case tok::kw_class:
1293      // parseRecord falls through and does not yet add an unwrapped line as a
1294      // record declaration or definition can start a structural element.
1295      parseRecord();
1296      // This does not apply for Java, JavaScript and C#.
1297      if (Style.Language == FormatStyle::LK_Java ||
1298          Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1299        if (FormatTok->is(tok::semi))
1300          nextToken();
1301        addUnwrappedLine();
1302        return;
1303      }
1304      break;
1305    case tok::period:
1306      nextToken();
1307      // In Java, classes have an implicit static member "class".
1308      if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1309          FormatTok->is(tok::kw_class))
1310        nextToken();
1311      if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1312          FormatTok->Tok.getIdentifierInfo())
1313        // JavaScript only has pseudo keywords, all keywords are allowed to
1314        // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1315        nextToken();
1316      break;
1317    case tok::semi:
1318      nextToken();
1319      addUnwrappedLine();
1320      return;
1321    case tok::r_brace:
1322      addUnwrappedLine();
1323      return;
1324    case tok::l_paren:
1325      parseParens();
1326      break;
1327    case tok::kw_operator:
1328      nextToken();
1329      if (FormatTok->isBinaryOperator())
1330        nextToken();
1331      break;
1332    case tok::caret:
1333      nextToken();
1334      if (FormatTok->Tok.isAnyIdentifier() ||
1335          FormatTok->isSimpleTypeSpecifier())
1336        nextToken();
1337      if (FormatTok->is(tok::l_paren))
1338        parseParens();
1339      if (FormatTok->is(tok::l_brace))
1340        parseChildBlock();
1341      break;
1342    case tok::l_brace:
1343      if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1344        // A block outside of parentheses must be the last part of a
1345        // structural element.
1346        // FIXME: Figure out cases where this is not true, and add projections
1347        // for them (the one we know is missing are lambdas).
1348        if (Style.BraceWrapping.AfterFunction)
1349          addUnwrappedLine();
1350        FormatTok->setType(TT_FunctionLBrace);
1351        parseBlock(/*MustBeDeclaration=*/false);
1352        addUnwrappedLine();
1353        return;
1354      }
1355      // Otherwise this was a braced init list, and the structural
1356      // element continues.
1357      break;
1358    case tok::kw_try:
1359      if (Style.Language == FormatStyle::LK_JavaScript &&
1360          Line->MustBeDeclaration) {
1361        // field/method declaration.
1362        nextToken();
1363        break;
1364      }
1365      // We arrive here when parsing function-try blocks.
1366      if (Style.BraceWrapping.AfterFunction)
1367        addUnwrappedLine();
1368      parseTryCatch();
1369      return;
1370    case tok::identifier: {
1371      if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1372          Line->MustBeDeclaration) {
1373        addUnwrappedLine();
1374        parseCSharpGenericTypeConstraint();
1375        break;
1376      }
1377      if (FormatTok->is(TT_MacroBlockEnd)) {
1378        addUnwrappedLine();
1379        return;
1380      }
1381
1382      // Function declarations (as opposed to function expressions) are parsed
1383      // on their own unwrapped line by continuing this loop. Function
1384      // expressions (functions that are not on their own line) must not create
1385      // a new unwrapped line, so they are special cased below.
1386      size_t TokenCount = Line->Tokens.size();
1387      if (Style.Language == FormatStyle::LK_JavaScript &&
1388          FormatTok->is(Keywords.kw_function) &&
1389          (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1390                                                     Keywords.kw_async)))) {
1391        tryToParseJSFunction();
1392        break;
1393      }
1394      if ((Style.Language == FormatStyle::LK_JavaScript ||
1395           Style.Language == FormatStyle::LK_Java) &&
1396          FormatTok->is(Keywords.kw_interface)) {
1397        if (Style.Language == FormatStyle::LK_JavaScript) {
1398          // In JavaScript/TypeScript, "interface" can be used as a standalone
1399          // identifier, e.g. in `var interface = 1;`. If "interface" is
1400          // followed by another identifier, it is very like to be an actual
1401          // interface declaration.
1402          unsigned StoredPosition = Tokens->getPosition();
1403          FormatToken *Next = Tokens->getNextToken();
1404          FormatTok = Tokens->setPosition(StoredPosition);
1405          if (Next && !mustBeJSIdent(Keywords, Next)) {
1406            nextToken();
1407            break;
1408          }
1409        }
1410        parseRecord();
1411        addUnwrappedLine();
1412        return;
1413      }
1414
1415      if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1416        parseStatementMacro();
1417        return;
1418      }
1419
1420      // See if the following token should start a new unwrapped line.
1421      StringRef Text = FormatTok->TokenText;
1422      nextToken();
1423
1424      // JS doesn't have macros, and within classes colons indicate fields, not
1425      // labels.
1426      if (Style.Language == FormatStyle::LK_JavaScript)
1427        break;
1428
1429      TokenCount = Line->Tokens.size();
1430      if (TokenCount == 1 ||
1431          (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1432        if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1433          Line->Tokens.begin()->Tok->MustBreakBefore = true;
1434          parseLabel(!Style.IndentGotoLabels);
1435          return;
1436        }
1437        // Recognize function-like macro usages without trailing semicolon as
1438        // well as free-standing macros like Q_OBJECT.
1439        bool FunctionLike = FormatTok->is(tok::l_paren);
1440        if (FunctionLike)
1441          parseParens();
1442
1443        bool FollowedByNewline =
1444            CommentsBeforeNextToken.empty()
1445                ? FormatTok->NewlinesBefore > 0
1446                : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1447
1448        if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1449            tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1450          addUnwrappedLine();
1451          return;
1452        }
1453      }
1454      break;
1455    }
1456    case tok::equal:
1457      // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1458      // TT_JsFatArrow. The always start an expression or a child block if
1459      // followed by a curly.
1460      if (FormatTok->is(TT_JsFatArrow)) {
1461        nextToken();
1462        if (FormatTok->is(tok::l_brace)) {
1463          // C# may break after => if the next character is a newline.
1464          if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1465            // calling `addUnwrappedLine()` here causes odd parsing errors.
1466            FormatTok->MustBreakBefore = true;
1467          }
1468          parseChildBlock();
1469        }
1470        break;
1471      }
1472
1473      nextToken();
1474      if (FormatTok->Tok.is(tok::l_brace)) {
1475        // Block kind should probably be set to BK_BracedInit for any language.
1476        // C# needs this change to ensure that array initialisers and object
1477        // initialisers are indented the same way.
1478        if (Style.isCSharp())
1479          FormatTok->BlockKind = BK_BracedInit;
1480        nextToken();
1481        parseBracedList();
1482      } else if (Style.Language == FormatStyle::LK_Proto &&
1483                 FormatTok->Tok.is(tok::less)) {
1484        nextToken();
1485        parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1486                        /*ClosingBraceKind=*/tok::greater);
1487      }
1488      break;
1489    case tok::l_square:
1490      parseSquare();
1491      break;
1492    case tok::kw_new:
1493      parseNew();
1494      break;
1495    default:
1496      nextToken();
1497      break;
1498    }
1499  } while (!eof());
1500}
1501
1502bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1503  assert(FormatTok->is(tok::l_brace));
1504  if (!Style.isCSharp())
1505    return false;
1506  // See if it's a property accessor.
1507  if (FormatTok->Previous->isNot(tok::identifier))
1508    return false;
1509
1510  // See if we are inside a property accessor.
1511  //
1512  // Record the current tokenPosition so that we can advance and
1513  // reset the current token. `Next` is not set yet so we need
1514  // another way to advance along the token stream.
1515  unsigned int StoredPosition = Tokens->getPosition();
1516  FormatToken *Tok = Tokens->getNextToken();
1517
1518  // A trivial property accessor is of the form:
1519  // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1520  // Track these as they do not require line breaks to be introduced.
1521  bool HasGetOrSet = false;
1522  bool IsTrivialPropertyAccessor = true;
1523  while (!eof()) {
1524    if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1525                     tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1526                     Keywords.kw_set)) {
1527      if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1528        HasGetOrSet = true;
1529      Tok = Tokens->getNextToken();
1530      continue;
1531    }
1532    if (Tok->isNot(tok::r_brace))
1533      IsTrivialPropertyAccessor = false;
1534    break;
1535  }
1536
1537  if (!HasGetOrSet) {
1538    Tokens->setPosition(StoredPosition);
1539    return false;
1540  }
1541
1542  // Try to parse the property accessor:
1543  // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1544  Tokens->setPosition(StoredPosition);
1545  if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true)
1546    addUnwrappedLine();
1547  nextToken();
1548  do {
1549    switch (FormatTok->Tok.getKind()) {
1550    case tok::r_brace:
1551      nextToken();
1552      if (FormatTok->is(tok::equal)) {
1553        while (!eof() && FormatTok->isNot(tok::semi))
1554          nextToken();
1555        nextToken();
1556      }
1557      addUnwrappedLine();
1558      return true;
1559    case tok::l_brace:
1560      ++Line->Level;
1561      parseBlock(/*MustBeDeclaration=*/true);
1562      addUnwrappedLine();
1563      --Line->Level;
1564      break;
1565    case tok::equal:
1566      if (FormatTok->is(TT_JsFatArrow)) {
1567        ++Line->Level;
1568        do {
1569          nextToken();
1570        } while (!eof() && FormatTok->isNot(tok::semi));
1571        nextToken();
1572        addUnwrappedLine();
1573        --Line->Level;
1574        break;
1575      }
1576      nextToken();
1577      break;
1578    default:
1579      if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1580          !IsTrivialPropertyAccessor) {
1581        // Non-trivial get/set needs to be on its own line.
1582        addUnwrappedLine();
1583      }
1584      nextToken();
1585    }
1586  } while (!eof());
1587
1588  // Unreachable for well-formed code (paired '{' and '}').
1589  return true;
1590}
1591
1592bool UnwrappedLineParser::tryToParseLambda() {
1593  if (!Style.isCpp()) {
1594    nextToken();
1595    return false;
1596  }
1597  assert(FormatTok->is(tok::l_square));
1598  FormatToken &LSquare = *FormatTok;
1599  if (!tryToParseLambdaIntroducer())
1600    return false;
1601
1602  bool SeenArrow = false;
1603
1604  while (FormatTok->isNot(tok::l_brace)) {
1605    if (FormatTok->isSimpleTypeSpecifier()) {
1606      nextToken();
1607      continue;
1608    }
1609    switch (FormatTok->Tok.getKind()) {
1610    case tok::l_brace:
1611      break;
1612    case tok::l_paren:
1613      parseParens();
1614      break;
1615    case tok::amp:
1616    case tok::star:
1617    case tok::kw_const:
1618    case tok::comma:
1619    case tok::less:
1620    case tok::greater:
1621    case tok::identifier:
1622    case tok::numeric_constant:
1623    case tok::coloncolon:
1624    case tok::kw_class:
1625    case tok::kw_mutable:
1626    case tok::kw_noexcept:
1627    case tok::kw_template:
1628    case tok::kw_typename:
1629      nextToken();
1630      break;
1631    // Specialization of a template with an integer parameter can contain
1632    // arithmetic, logical, comparison and ternary operators.
1633    //
1634    // FIXME: This also accepts sequences of operators that are not in the scope
1635    // of a template argument list.
1636    //
1637    // In a C++ lambda a template type can only occur after an arrow. We use
1638    // this as an heuristic to distinguish between Objective-C expressions
1639    // followed by an `a->b` expression, such as:
1640    // ([obj func:arg] + a->b)
1641    // Otherwise the code below would parse as a lambda.
1642    //
1643    // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1644    // explicit template lists: []<bool b = true && false>(U &&u){}
1645    case tok::plus:
1646    case tok::minus:
1647    case tok::exclaim:
1648    case tok::tilde:
1649    case tok::slash:
1650    case tok::percent:
1651    case tok::lessless:
1652    case tok::pipe:
1653    case tok::pipepipe:
1654    case tok::ampamp:
1655    case tok::caret:
1656    case tok::equalequal:
1657    case tok::exclaimequal:
1658    case tok::greaterequal:
1659    case tok::lessequal:
1660    case tok::question:
1661    case tok::colon:
1662    case tok::ellipsis:
1663    case tok::kw_true:
1664    case tok::kw_false:
1665      if (SeenArrow) {
1666        nextToken();
1667        break;
1668      }
1669      return true;
1670    case tok::arrow:
1671      // This might or might not actually be a lambda arrow (this could be an
1672      // ObjC method invocation followed by a dereferencing arrow). We might
1673      // reset this back to TT_Unknown in TokenAnnotator.
1674      FormatTok->setType(TT_LambdaArrow);
1675      SeenArrow = true;
1676      nextToken();
1677      break;
1678    default:
1679      return true;
1680    }
1681  }
1682  FormatTok->setType(TT_LambdaLBrace);
1683  LSquare.setType(TT_LambdaLSquare);
1684  parseChildBlock();
1685  return true;
1686}
1687
1688bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1689  const FormatToken *Previous = FormatTok->Previous;
1690  if (Previous &&
1691      (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1692                         tok::kw_delete, tok::l_square) ||
1693       FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1694       Previous->isSimpleTypeSpecifier())) {
1695    nextToken();
1696    return false;
1697  }
1698  nextToken();
1699  if (FormatTok->is(tok::l_square)) {
1700    return false;
1701  }
1702  parseSquare(/*LambdaIntroducer=*/true);
1703  return true;
1704}
1705
1706void UnwrappedLineParser::tryToParseJSFunction() {
1707  assert(FormatTok->is(Keywords.kw_function) ||
1708         FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1709  if (FormatTok->is(Keywords.kw_async))
1710    nextToken();
1711  // Consume "function".
1712  nextToken();
1713
1714  // Consume * (generator function). Treat it like C++'s overloaded operators.
1715  if (FormatTok->is(tok::star)) {
1716    FormatTok->setType(TT_OverloadedOperator);
1717    nextToken();
1718  }
1719
1720  // Consume function name.
1721  if (FormatTok->is(tok::identifier))
1722    nextToken();
1723
1724  if (FormatTok->isNot(tok::l_paren))
1725    return;
1726
1727  // Parse formal parameter list.
1728  parseParens();
1729
1730  if (FormatTok->is(tok::colon)) {
1731    // Parse a type definition.
1732    nextToken();
1733
1734    // Eat the type declaration. For braced inline object types, balance braces,
1735    // otherwise just parse until finding an l_brace for the function body.
1736    if (FormatTok->is(tok::l_brace))
1737      tryToParseBracedList();
1738    else
1739      while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1740        nextToken();
1741  }
1742
1743  if (FormatTok->is(tok::semi))
1744    return;
1745
1746  parseChildBlock();
1747}
1748
1749bool UnwrappedLineParser::tryToParseBracedList() {
1750  if (FormatTok->BlockKind == BK_Unknown)
1751    calculateBraceTypes();
1752  assert(FormatTok->BlockKind != BK_Unknown);
1753  if (FormatTok->BlockKind == BK_Block)
1754    return false;
1755  nextToken();
1756  parseBracedList();
1757  return true;
1758}
1759
1760bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1761                                          bool IsEnum,
1762                                          tok::TokenKind ClosingBraceKind) {
1763  bool HasError = false;
1764
1765  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1766  // replace this by using parseAssigmentExpression() inside.
1767  do {
1768    if (Style.isCSharp()) {
1769      if (FormatTok->is(TT_JsFatArrow)) {
1770        nextToken();
1771        // Fat arrows can be followed by simple expressions or by child blocks
1772        // in curly braces.
1773        if (FormatTok->is(tok::l_brace)) {
1774          parseChildBlock();
1775          continue;
1776        }
1777      }
1778    }
1779    if (Style.Language == FormatStyle::LK_JavaScript) {
1780      if (FormatTok->is(Keywords.kw_function) ||
1781          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1782        tryToParseJSFunction();
1783        continue;
1784      }
1785      if (FormatTok->is(TT_JsFatArrow)) {
1786        nextToken();
1787        // Fat arrows can be followed by simple expressions or by child blocks
1788        // in curly braces.
1789        if (FormatTok->is(tok::l_brace)) {
1790          parseChildBlock();
1791          continue;
1792        }
1793      }
1794      if (FormatTok->is(tok::l_brace)) {
1795        // Could be a method inside of a braced list `{a() { return 1; }}`.
1796        if (tryToParseBracedList())
1797          continue;
1798        parseChildBlock();
1799      }
1800    }
1801    if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1802      if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1803        addUnwrappedLine();
1804      nextToken();
1805      return !HasError;
1806    }
1807    switch (FormatTok->Tok.getKind()) {
1808    case tok::caret:
1809      nextToken();
1810      if (FormatTok->is(tok::l_brace)) {
1811        parseChildBlock();
1812      }
1813      break;
1814    case tok::l_square:
1815      if (Style.isCSharp())
1816        parseSquare();
1817      else
1818        tryToParseLambda();
1819      break;
1820    case tok::l_paren:
1821      parseParens();
1822      // JavaScript can just have free standing methods and getters/setters in
1823      // object literals. Detect them by a "{" following ")".
1824      if (Style.Language == FormatStyle::LK_JavaScript) {
1825        if (FormatTok->is(tok::l_brace))
1826          parseChildBlock();
1827        break;
1828      }
1829      break;
1830    case tok::l_brace:
1831      // Assume there are no blocks inside a braced init list apart
1832      // from the ones we explicitly parse out (like lambdas).
1833      FormatTok->BlockKind = BK_BracedInit;
1834      nextToken();
1835      parseBracedList();
1836      break;
1837    case tok::less:
1838      if (Style.Language == FormatStyle::LK_Proto) {
1839        nextToken();
1840        parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1841                        /*ClosingBraceKind=*/tok::greater);
1842      } else {
1843        nextToken();
1844      }
1845      break;
1846    case tok::semi:
1847      // JavaScript (or more precisely TypeScript) can have semicolons in braced
1848      // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1849      // used for error recovery if we have otherwise determined that this is
1850      // a braced list.
1851      if (Style.Language == FormatStyle::LK_JavaScript) {
1852        nextToken();
1853        break;
1854      }
1855      HasError = true;
1856      if (!ContinueOnSemicolons)
1857        return !HasError;
1858      nextToken();
1859      break;
1860    case tok::comma:
1861      nextToken();
1862      if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1863        addUnwrappedLine();
1864      break;
1865    default:
1866      nextToken();
1867      break;
1868    }
1869  } while (!eof());
1870  return false;
1871}
1872
1873void UnwrappedLineParser::parseParens() {
1874  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1875  nextToken();
1876  do {
1877    switch (FormatTok->Tok.getKind()) {
1878    case tok::l_paren:
1879      parseParens();
1880      if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1881        parseChildBlock();
1882      break;
1883    case tok::r_paren:
1884      nextToken();
1885      return;
1886    case tok::r_brace:
1887      // A "}" inside parenthesis is an error if there wasn't a matching "{".
1888      return;
1889    case tok::l_square:
1890      tryToParseLambda();
1891      break;
1892    case tok::l_brace:
1893      if (!tryToParseBracedList())
1894        parseChildBlock();
1895      break;
1896    case tok::at:
1897      nextToken();
1898      if (FormatTok->Tok.is(tok::l_brace)) {
1899        nextToken();
1900        parseBracedList();
1901      }
1902      break;
1903    case tok::kw_class:
1904      if (Style.Language == FormatStyle::LK_JavaScript)
1905        parseRecord(/*ParseAsExpr=*/true);
1906      else
1907        nextToken();
1908      break;
1909    case tok::identifier:
1910      if (Style.Language == FormatStyle::LK_JavaScript &&
1911          (FormatTok->is(Keywords.kw_function) ||
1912           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1913        tryToParseJSFunction();
1914      else
1915        nextToken();
1916      break;
1917    default:
1918      nextToken();
1919      break;
1920    }
1921  } while (!eof());
1922}
1923
1924void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1925  if (!LambdaIntroducer) {
1926    assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1927    if (tryToParseLambda())
1928      return;
1929  }
1930  do {
1931    switch (FormatTok->Tok.getKind()) {
1932    case tok::l_paren:
1933      parseParens();
1934      break;
1935    case tok::r_square:
1936      nextToken();
1937      return;
1938    case tok::r_brace:
1939      // A "}" inside parenthesis is an error if there wasn't a matching "{".
1940      return;
1941    case tok::l_square:
1942      parseSquare();
1943      break;
1944    case tok::l_brace: {
1945      if (!tryToParseBracedList())
1946        parseChildBlock();
1947      break;
1948    }
1949    case tok::at:
1950      nextToken();
1951      if (FormatTok->Tok.is(tok::l_brace)) {
1952        nextToken();
1953        parseBracedList();
1954      }
1955      break;
1956    default:
1957      nextToken();
1958      break;
1959    }
1960  } while (!eof());
1961}
1962
1963void UnwrappedLineParser::parseIfThenElse() {
1964  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1965  nextToken();
1966  if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
1967    nextToken();
1968  if (FormatTok->Tok.is(tok::l_paren))
1969    parseParens();
1970  // handle [[likely]] / [[unlikely]]
1971  if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
1972    parseSquare();
1973  bool NeedsUnwrappedLine = false;
1974  if (FormatTok->Tok.is(tok::l_brace)) {
1975    CompoundStatementIndenter Indenter(this, Style, Line->Level);
1976    parseBlock(/*MustBeDeclaration=*/false);
1977    if (Style.BraceWrapping.BeforeElse)
1978      addUnwrappedLine();
1979    else
1980      NeedsUnwrappedLine = true;
1981  } else {
1982    addUnwrappedLine();
1983    ++Line->Level;
1984    parseStructuralElement();
1985    --Line->Level;
1986  }
1987  if (FormatTok->Tok.is(tok::kw_else)) {
1988    nextToken();
1989    // handle [[likely]] / [[unlikely]]
1990    if (FormatTok->Tok.is(tok::l_square) && tryToParseSimpleAttribute())
1991      parseSquare();
1992    if (FormatTok->Tok.is(tok::l_brace)) {
1993      CompoundStatementIndenter Indenter(this, Style, Line->Level);
1994      parseBlock(/*MustBeDeclaration=*/false);
1995      addUnwrappedLine();
1996    } else if (FormatTok->Tok.is(tok::kw_if)) {
1997      parseIfThenElse();
1998    } else {
1999      addUnwrappedLine();
2000      ++Line->Level;
2001      parseStructuralElement();
2002      if (FormatTok->is(tok::eof))
2003        addUnwrappedLine();
2004      --Line->Level;
2005    }
2006  } else if (NeedsUnwrappedLine) {
2007    addUnwrappedLine();
2008  }
2009}
2010
2011void UnwrappedLineParser::parseTryCatch() {
2012  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2013  nextToken();
2014  bool NeedsUnwrappedLine = false;
2015  if (FormatTok->is(tok::colon)) {
2016    // We are in a function try block, what comes is an initializer list.
2017    nextToken();
2018
2019    // In case identifiers were removed by clang-tidy, what might follow is
2020    // multiple commas in sequence - before the first identifier.
2021    while (FormatTok->is(tok::comma))
2022      nextToken();
2023
2024    while (FormatTok->is(tok::identifier)) {
2025      nextToken();
2026      if (FormatTok->is(tok::l_paren))
2027        parseParens();
2028
2029      // In case identifiers were removed by clang-tidy, what might follow is
2030      // multiple commas in sequence - after the first identifier.
2031      while (FormatTok->is(tok::comma))
2032        nextToken();
2033    }
2034  }
2035  // Parse try with resource.
2036  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2037    parseParens();
2038  }
2039  if (FormatTok->is(tok::l_brace)) {
2040    CompoundStatementIndenter Indenter(this, Style, Line->Level);
2041    parseBlock(/*MustBeDeclaration=*/false);
2042    if (Style.BraceWrapping.BeforeCatch) {
2043      addUnwrappedLine();
2044    } else {
2045      NeedsUnwrappedLine = true;
2046    }
2047  } else if (!FormatTok->is(tok::kw_catch)) {
2048    // The C++ standard requires a compound-statement after a try.
2049    // If there's none, we try to assume there's a structuralElement
2050    // and try to continue.
2051    addUnwrappedLine();
2052    ++Line->Level;
2053    parseStructuralElement();
2054    --Line->Level;
2055  }
2056  while (1) {
2057    if (FormatTok->is(tok::at))
2058      nextToken();
2059    if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2060                             tok::kw___finally) ||
2061          ((Style.Language == FormatStyle::LK_Java ||
2062            Style.Language == FormatStyle::LK_JavaScript) &&
2063           FormatTok->is(Keywords.kw_finally)) ||
2064          (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2065           FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2066      break;
2067    nextToken();
2068    while (FormatTok->isNot(tok::l_brace)) {
2069      if (FormatTok->is(tok::l_paren)) {
2070        parseParens();
2071        continue;
2072      }
2073      if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2074        return;
2075      nextToken();
2076    }
2077    NeedsUnwrappedLine = false;
2078    CompoundStatementIndenter Indenter(this, Style, Line->Level);
2079    parseBlock(/*MustBeDeclaration=*/false);
2080    if (Style.BraceWrapping.BeforeCatch)
2081      addUnwrappedLine();
2082    else
2083      NeedsUnwrappedLine = true;
2084  }
2085  if (NeedsUnwrappedLine)
2086    addUnwrappedLine();
2087}
2088
2089void UnwrappedLineParser::parseNamespace() {
2090  assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2091         "'namespace' expected");
2092
2093  const FormatToken &InitialToken = *FormatTok;
2094  nextToken();
2095  if (InitialToken.is(TT_NamespaceMacro)) {
2096    parseParens();
2097  } else {
2098    while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2099                              tok::l_square)) {
2100      if (FormatTok->is(tok::l_square))
2101        parseSquare();
2102      else
2103        nextToken();
2104    }
2105  }
2106  if (FormatTok->Tok.is(tok::l_brace)) {
2107    if (ShouldBreakBeforeBrace(Style, InitialToken))
2108      addUnwrappedLine();
2109
2110    bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
2111                    (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2112                     DeclarationScopeStack.size() > 1);
2113    parseBlock(/*MustBeDeclaration=*/true, AddLevel);
2114    // Munch the semicolon after a namespace. This is more common than one would
2115    // think. Putting the semicolon into its own line is very ugly.
2116    if (FormatTok->Tok.is(tok::semi))
2117      nextToken();
2118    addUnwrappedLine();
2119  }
2120  // FIXME: Add error handling.
2121}
2122
2123void UnwrappedLineParser::parseNew() {
2124  assert(FormatTok->is(tok::kw_new) && "'new' expected");
2125  nextToken();
2126
2127  if (Style.isCSharp()) {
2128    do {
2129      if (FormatTok->is(tok::l_brace))
2130        parseBracedList();
2131
2132      if (FormatTok->isOneOf(tok::semi, tok::comma))
2133        return;
2134
2135      nextToken();
2136    } while (!eof());
2137  }
2138
2139  if (Style.Language != FormatStyle::LK_Java)
2140    return;
2141
2142  // In Java, we can parse everything up to the parens, which aren't optional.
2143  do {
2144    // There should not be a ;, { or } before the new's open paren.
2145    if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2146      return;
2147
2148    // Consume the parens.
2149    if (FormatTok->is(tok::l_paren)) {
2150      parseParens();
2151
2152      // If there is a class body of an anonymous class, consume that as child.
2153      if (FormatTok->is(tok::l_brace))
2154        parseChildBlock();
2155      return;
2156    }
2157    nextToken();
2158  } while (!eof());
2159}
2160
2161void UnwrappedLineParser::parseForOrWhileLoop() {
2162  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2163         "'for', 'while' or foreach macro expected");
2164  nextToken();
2165  // JS' for await ( ...
2166  if (Style.Language == FormatStyle::LK_JavaScript &&
2167      FormatTok->is(Keywords.kw_await))
2168    nextToken();
2169  if (FormatTok->Tok.is(tok::l_paren))
2170    parseParens();
2171  if (FormatTok->Tok.is(tok::l_brace)) {
2172    CompoundStatementIndenter Indenter(this, Style, Line->Level);
2173    parseBlock(/*MustBeDeclaration=*/false);
2174    addUnwrappedLine();
2175  } else {
2176    addUnwrappedLine();
2177    ++Line->Level;
2178    parseStructuralElement();
2179    --Line->Level;
2180  }
2181}
2182
2183void UnwrappedLineParser::parseDoWhile() {
2184  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2185  nextToken();
2186  if (FormatTok->Tok.is(tok::l_brace)) {
2187    CompoundStatementIndenter Indenter(this, Style, Line->Level);
2188    parseBlock(/*MustBeDeclaration=*/false);
2189    if (Style.BraceWrapping.BeforeWhile)
2190      addUnwrappedLine();
2191  } else {
2192    addUnwrappedLine();
2193    ++Line->Level;
2194    parseStructuralElement();
2195    --Line->Level;
2196  }
2197
2198  // FIXME: Add error handling.
2199  if (!FormatTok->Tok.is(tok::kw_while)) {
2200    addUnwrappedLine();
2201    return;
2202  }
2203
2204  nextToken();
2205  parseStructuralElement();
2206}
2207
2208void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2209  nextToken();
2210  unsigned OldLineLevel = Line->Level;
2211  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2212    --Line->Level;
2213  if (LeftAlignLabel)
2214    Line->Level = 0;
2215  if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2216      FormatTok->Tok.is(tok::l_brace)) {
2217    CompoundStatementIndenter Indenter(this, Line->Level,
2218                                       Style.BraceWrapping.AfterCaseLabel,
2219                                       Style.BraceWrapping.IndentBraces);
2220    parseBlock(/*MustBeDeclaration=*/false);
2221    if (FormatTok->Tok.is(tok::kw_break)) {
2222      if (Style.BraceWrapping.AfterControlStatement ==
2223          FormatStyle::BWACS_Always) {
2224        addUnwrappedLine();
2225        if (!Style.IndentCaseBlocks &&
2226            Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2227          Line->Level++;
2228        }
2229      }
2230      parseStructuralElement();
2231    }
2232    addUnwrappedLine();
2233  } else {
2234    if (FormatTok->is(tok::semi))
2235      nextToken();
2236    addUnwrappedLine();
2237  }
2238  Line->Level = OldLineLevel;
2239  if (FormatTok->isNot(tok::l_brace)) {
2240    parseStructuralElement();
2241    addUnwrappedLine();
2242  }
2243}
2244
2245void UnwrappedLineParser::parseCaseLabel() {
2246  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2247  // FIXME: fix handling of complex expressions here.
2248  do {
2249    nextToken();
2250  } while (!eof() && !FormatTok->Tok.is(tok::colon));
2251  parseLabel();
2252}
2253
2254void UnwrappedLineParser::parseSwitch() {
2255  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2256  nextToken();
2257  if (FormatTok->Tok.is(tok::l_paren))
2258    parseParens();
2259  if (FormatTok->Tok.is(tok::l_brace)) {
2260    CompoundStatementIndenter Indenter(this, Style, Line->Level);
2261    parseBlock(/*MustBeDeclaration=*/false);
2262    addUnwrappedLine();
2263  } else {
2264    addUnwrappedLine();
2265    ++Line->Level;
2266    parseStructuralElement();
2267    --Line->Level;
2268  }
2269}
2270
2271void UnwrappedLineParser::parseAccessSpecifier() {
2272  nextToken();
2273  // Understand Qt's slots.
2274  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2275    nextToken();
2276  // Otherwise, we don't know what it is, and we'd better keep the next token.
2277  if (FormatTok->Tok.is(tok::colon))
2278    nextToken();
2279  addUnwrappedLine();
2280}
2281
2282bool UnwrappedLineParser::parseEnum() {
2283  // Won't be 'enum' for NS_ENUMs.
2284  if (FormatTok->Tok.is(tok::kw_enum))
2285    nextToken();
2286
2287  // In TypeScript, "enum" can also be used as property name, e.g. in interface
2288  // declarations. An "enum" keyword followed by a colon would be a syntax
2289  // error and thus assume it is just an identifier.
2290  if (Style.Language == FormatStyle::LK_JavaScript &&
2291      FormatTok->isOneOf(tok::colon, tok::question))
2292    return false;
2293
2294  // In protobuf, "enum" can be used as a field name.
2295  if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2296    return false;
2297
2298  // Eat up enum class ...
2299  if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2300    nextToken();
2301
2302  while (FormatTok->Tok.getIdentifierInfo() ||
2303         FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2304                            tok::greater, tok::comma, tok::question)) {
2305    nextToken();
2306    // We can have macros or attributes in between 'enum' and the enum name.
2307    if (FormatTok->is(tok::l_paren))
2308      parseParens();
2309    if (FormatTok->is(tok::identifier)) {
2310      nextToken();
2311      // If there are two identifiers in a row, this is likely an elaborate
2312      // return type. In Java, this can be "implements", etc.
2313      if (Style.isCpp() && FormatTok->is(tok::identifier))
2314        return false;
2315    }
2316  }
2317
2318  // Just a declaration or something is wrong.
2319  if (FormatTok->isNot(tok::l_brace))
2320    return true;
2321  FormatTok->BlockKind = BK_Block;
2322
2323  if (Style.Language == FormatStyle::LK_Java) {
2324    // Java enums are different.
2325    parseJavaEnumBody();
2326    return true;
2327  }
2328  if (Style.Language == FormatStyle::LK_Proto) {
2329    parseBlock(/*MustBeDeclaration=*/true);
2330    return true;
2331  }
2332
2333  if (!Style.AllowShortEnumsOnASingleLine)
2334    addUnwrappedLine();
2335  // Parse enum body.
2336  nextToken();
2337  if (!Style.AllowShortEnumsOnASingleLine) {
2338    addUnwrappedLine();
2339    Line->Level += 1;
2340  }
2341  bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2342                                   /*IsEnum=*/true);
2343  if (!Style.AllowShortEnumsOnASingleLine)
2344    Line->Level -= 1;
2345  if (HasError) {
2346    if (FormatTok->is(tok::semi))
2347      nextToken();
2348    addUnwrappedLine();
2349  }
2350  return true;
2351
2352  // There is no addUnwrappedLine() here so that we fall through to parsing a
2353  // structural element afterwards. Thus, in "enum A {} n, m;",
2354  // "} n, m;" will end up in one unwrapped line.
2355}
2356
2357namespace {
2358// A class used to set and restore the Token position when peeking
2359// ahead in the token source.
2360class ScopedTokenPosition {
2361  unsigned StoredPosition;
2362  FormatTokenSource *Tokens;
2363
2364public:
2365  ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2366    assert(Tokens && "Tokens expected to not be null");
2367    StoredPosition = Tokens->getPosition();
2368  }
2369
2370  ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2371};
2372} // namespace
2373
2374// Look to see if we have [[ by looking ahead, if
2375// its not then rewind to the original position.
2376bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2377  ScopedTokenPosition AutoPosition(Tokens);
2378  FormatToken *Tok = Tokens->getNextToken();
2379  // We already read the first [ check for the second.
2380  if (Tok && !Tok->is(tok::l_square)) {
2381    return false;
2382  }
2383  // Double check that the attribute is just something
2384  // fairly simple.
2385  while (Tok) {
2386    if (Tok->is(tok::r_square)) {
2387      break;
2388    }
2389    Tok = Tokens->getNextToken();
2390  }
2391  Tok = Tokens->getNextToken();
2392  if (Tok && !Tok->is(tok::r_square)) {
2393    return false;
2394  }
2395  Tok = Tokens->getNextToken();
2396  if (Tok && Tok->is(tok::semi)) {
2397    return false;
2398  }
2399  return true;
2400}
2401
2402void UnwrappedLineParser::parseJavaEnumBody() {
2403  // Determine whether the enum is simple, i.e. does not have a semicolon or
2404  // constants with class bodies. Simple enums can be formatted like braced
2405  // lists, contracted to a single line, etc.
2406  unsigned StoredPosition = Tokens->getPosition();
2407  bool IsSimple = true;
2408  FormatToken *Tok = Tokens->getNextToken();
2409  while (Tok) {
2410    if (Tok->is(tok::r_brace))
2411      break;
2412    if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2413      IsSimple = false;
2414      break;
2415    }
2416    // FIXME: This will also mark enums with braces in the arguments to enum
2417    // constants as "not simple". This is probably fine in practice, though.
2418    Tok = Tokens->getNextToken();
2419  }
2420  FormatTok = Tokens->setPosition(StoredPosition);
2421
2422  if (IsSimple) {
2423    nextToken();
2424    parseBracedList();
2425    addUnwrappedLine();
2426    return;
2427  }
2428
2429  // Parse the body of a more complex enum.
2430  // First add a line for everything up to the "{".
2431  nextToken();
2432  addUnwrappedLine();
2433  ++Line->Level;
2434
2435  // Parse the enum constants.
2436  while (FormatTok) {
2437    if (FormatTok->is(tok::l_brace)) {
2438      // Parse the constant's class body.
2439      parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2440                 /*MunchSemi=*/false);
2441    } else if (FormatTok->is(tok::l_paren)) {
2442      parseParens();
2443    } else if (FormatTok->is(tok::comma)) {
2444      nextToken();
2445      addUnwrappedLine();
2446    } else if (FormatTok->is(tok::semi)) {
2447      nextToken();
2448      addUnwrappedLine();
2449      break;
2450    } else if (FormatTok->is(tok::r_brace)) {
2451      addUnwrappedLine();
2452      break;
2453    } else {
2454      nextToken();
2455    }
2456  }
2457
2458  // Parse the class body after the enum's ";" if any.
2459  parseLevel(/*HasOpeningBrace=*/true);
2460  nextToken();
2461  --Line->Level;
2462  addUnwrappedLine();
2463}
2464
2465void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2466  const FormatToken &InitialToken = *FormatTok;
2467  nextToken();
2468
2469  // The actual identifier can be a nested name specifier, and in macros
2470  // it is often token-pasted.
2471  // An [[attribute]] can be before the identifier.
2472  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2473                            tok::kw___attribute, tok::kw___declspec,
2474                            tok::kw_alignas, tok::l_square, tok::r_square) ||
2475         ((Style.Language == FormatStyle::LK_Java ||
2476           Style.Language == FormatStyle::LK_JavaScript) &&
2477          FormatTok->isOneOf(tok::period, tok::comma))) {
2478    if (Style.Language == FormatStyle::LK_JavaScript &&
2479        FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2480      // JavaScript/TypeScript supports inline object types in
2481      // extends/implements positions:
2482      //     class Foo implements {bar: number} { }
2483      nextToken();
2484      if (FormatTok->is(tok::l_brace)) {
2485        tryToParseBracedList();
2486        continue;
2487      }
2488    }
2489    bool IsNonMacroIdentifier =
2490        FormatTok->is(tok::identifier) &&
2491        FormatTok->TokenText != FormatTok->TokenText.upper();
2492    nextToken();
2493    // We can have macros or attributes in between 'class' and the class name.
2494    if (!IsNonMacroIdentifier) {
2495      if (FormatTok->Tok.is(tok::l_paren)) {
2496        parseParens();
2497      } else if (FormatTok->is(TT_AttributeSquare)) {
2498        parseSquare();
2499        // Consume the closing TT_AttributeSquare.
2500        if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2501          nextToken();
2502      }
2503    }
2504  }
2505
2506  // Note that parsing away template declarations here leads to incorrectly
2507  // accepting function declarations as record declarations.
2508  // In general, we cannot solve this problem. Consider:
2509  // class A<int> B() {}
2510  // which can be a function definition or a class definition when B() is a
2511  // macro. If we find enough real-world cases where this is a problem, we
2512  // can parse for the 'template' keyword in the beginning of the statement,
2513  // and thus rule out the record production in case there is no template
2514  // (this would still leave us with an ambiguity between template function
2515  // and class declarations).
2516  if (FormatTok->isOneOf(tok::colon, tok::less)) {
2517    while (!eof()) {
2518      if (FormatTok->is(tok::l_brace)) {
2519        calculateBraceTypes(/*ExpectClassBody=*/true);
2520        if (!tryToParseBracedList())
2521          break;
2522      }
2523      if (FormatTok->Tok.is(tok::semi))
2524        return;
2525      if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2526        addUnwrappedLine();
2527        nextToken();
2528        parseCSharpGenericTypeConstraint();
2529        break;
2530      }
2531      nextToken();
2532    }
2533  }
2534  if (FormatTok->Tok.is(tok::l_brace)) {
2535    if (ParseAsExpr) {
2536      parseChildBlock();
2537    } else {
2538      if (ShouldBreakBeforeBrace(Style, InitialToken))
2539        addUnwrappedLine();
2540
2541      parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2542                 /*MunchSemi=*/false);
2543    }
2544  }
2545  // There is no addUnwrappedLine() here so that we fall through to parsing a
2546  // structural element afterwards. Thus, in "class A {} n, m;",
2547  // "} n, m;" will end up in one unwrapped line.
2548}
2549
2550void UnwrappedLineParser::parseObjCMethod() {
2551  assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2552         "'(' or identifier expected.");
2553  do {
2554    if (FormatTok->Tok.is(tok::semi)) {
2555      nextToken();
2556      addUnwrappedLine();
2557      return;
2558    } else if (FormatTok->Tok.is(tok::l_brace)) {
2559      if (Style.BraceWrapping.AfterFunction)
2560        addUnwrappedLine();
2561      parseBlock(/*MustBeDeclaration=*/false);
2562      addUnwrappedLine();
2563      return;
2564    } else {
2565      nextToken();
2566    }
2567  } while (!eof());
2568}
2569
2570void UnwrappedLineParser::parseObjCProtocolList() {
2571  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2572  do {
2573    nextToken();
2574    // Early exit in case someone forgot a close angle.
2575    if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2576        FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2577      return;
2578  } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2579  nextToken(); // Skip '>'.
2580}
2581
2582void UnwrappedLineParser::parseObjCUntilAtEnd() {
2583  do {
2584    if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2585      nextToken();
2586      addUnwrappedLine();
2587      break;
2588    }
2589    if (FormatTok->is(tok::l_brace)) {
2590      parseBlock(/*MustBeDeclaration=*/false);
2591      // In ObjC interfaces, nothing should be following the "}".
2592      addUnwrappedLine();
2593    } else if (FormatTok->is(tok::r_brace)) {
2594      // Ignore stray "}". parseStructuralElement doesn't consume them.
2595      nextToken();
2596      addUnwrappedLine();
2597    } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2598      nextToken();
2599      parseObjCMethod();
2600    } else {
2601      parseStructuralElement();
2602    }
2603  } while (!eof());
2604}
2605
2606void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2607  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2608         FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2609  nextToken();
2610  nextToken(); // interface name
2611
2612  // @interface can be followed by a lightweight generic
2613  // specialization list, then either a base class or a category.
2614  if (FormatTok->Tok.is(tok::less)) {
2615    // Unlike protocol lists, generic parameterizations support
2616    // nested angles:
2617    //
2618    // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2619    //     NSObject <NSCopying, NSSecureCoding>
2620    //
2621    // so we need to count how many open angles we have left.
2622    unsigned NumOpenAngles = 1;
2623    do {
2624      nextToken();
2625      // Early exit in case someone forgot a close angle.
2626      if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2627          FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2628        break;
2629      if (FormatTok->Tok.is(tok::less))
2630        ++NumOpenAngles;
2631      else if (FormatTok->Tok.is(tok::greater)) {
2632        assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2633        --NumOpenAngles;
2634      }
2635    } while (!eof() && NumOpenAngles != 0);
2636    nextToken(); // Skip '>'.
2637  }
2638  if (FormatTok->Tok.is(tok::colon)) {
2639    nextToken();
2640    nextToken(); // base class name
2641  } else if (FormatTok->Tok.is(tok::l_paren))
2642    // Skip category, if present.
2643    parseParens();
2644
2645  if (FormatTok->Tok.is(tok::less))
2646    parseObjCProtocolList();
2647
2648  if (FormatTok->Tok.is(tok::l_brace)) {
2649    if (Style.BraceWrapping.AfterObjCDeclaration)
2650      addUnwrappedLine();
2651    parseBlock(/*MustBeDeclaration=*/true);
2652  }
2653
2654  // With instance variables, this puts '}' on its own line.  Without instance
2655  // variables, this ends the @interface line.
2656  addUnwrappedLine();
2657
2658  parseObjCUntilAtEnd();
2659}
2660
2661// Returns true for the declaration/definition form of @protocol,
2662// false for the expression form.
2663bool UnwrappedLineParser::parseObjCProtocol() {
2664  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2665  nextToken();
2666
2667  if (FormatTok->is(tok::l_paren))
2668    // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2669    return false;
2670
2671  // The definition/declaration form,
2672  // @protocol Foo
2673  // - (int)someMethod;
2674  // @end
2675
2676  nextToken(); // protocol name
2677
2678  if (FormatTok->Tok.is(tok::less))
2679    parseObjCProtocolList();
2680
2681  // Check for protocol declaration.
2682  if (FormatTok->Tok.is(tok::semi)) {
2683    nextToken();
2684    addUnwrappedLine();
2685    return true;
2686  }
2687
2688  addUnwrappedLine();
2689  parseObjCUntilAtEnd();
2690  return true;
2691}
2692
2693void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2694  bool IsImport = FormatTok->is(Keywords.kw_import);
2695  assert(IsImport || FormatTok->is(tok::kw_export));
2696  nextToken();
2697
2698  // Consume the "default" in "export default class/function".
2699  if (FormatTok->is(tok::kw_default))
2700    nextToken();
2701
2702  // Consume "async function", "function" and "default function", so that these
2703  // get parsed as free-standing JS functions, i.e. do not require a trailing
2704  // semicolon.
2705  if (FormatTok->is(Keywords.kw_async))
2706    nextToken();
2707  if (FormatTok->is(Keywords.kw_function)) {
2708    nextToken();
2709    return;
2710  }
2711
2712  // For imports, `export *`, `export {...}`, consume the rest of the line up
2713  // to the terminating `;`. For everything else, just return and continue
2714  // parsing the structural element, i.e. the declaration or expression for
2715  // `export default`.
2716  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2717      !FormatTok->isStringLiteral())
2718    return;
2719
2720  while (!eof()) {
2721    if (FormatTok->is(tok::semi))
2722      return;
2723    if (Line->Tokens.empty()) {
2724      // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2725      // import statement should terminate.
2726      return;
2727    }
2728    if (FormatTok->is(tok::l_brace)) {
2729      FormatTok->BlockKind = BK_Block;
2730      nextToken();
2731      parseBracedList();
2732    } else {
2733      nextToken();
2734    }
2735  }
2736}
2737
2738void UnwrappedLineParser::parseStatementMacro() {
2739  nextToken();
2740  if (FormatTok->is(tok::l_paren))
2741    parseParens();
2742  if (FormatTok->is(tok::semi))
2743    nextToken();
2744  addUnwrappedLine();
2745}
2746
2747LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2748                                                 StringRef Prefix = "") {
2749  llvm::dbgs() << Prefix << "Line(" << Line.Level
2750               << ", FSC=" << Line.FirstStartColumn << ")"
2751               << (Line.InPPDirective ? " MACRO" : "") << ": ";
2752  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2753                                                    E = Line.Tokens.end();
2754       I != E; ++I) {
2755    llvm::dbgs() << I->Tok->Tok.getName() << "["
2756                 << "T=" << I->Tok->getType()
2757                 << ", OC=" << I->Tok->OriginalColumn << "] ";
2758  }
2759  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2760                                                    E = Line.Tokens.end();
2761       I != E; ++I) {
2762    const UnwrappedLineNode &Node = *I;
2763    for (SmallVectorImpl<UnwrappedLine>::const_iterator
2764             I = Node.Children.begin(),
2765             E = Node.Children.end();
2766         I != E; ++I) {
2767      printDebugInfo(*I, "\nChild: ");
2768    }
2769  }
2770  llvm::dbgs() << "\n";
2771}
2772
2773void UnwrappedLineParser::addUnwrappedLine() {
2774  if (Line->Tokens.empty())
2775    return;
2776  LLVM_DEBUG({
2777    if (CurrentLines == &Lines)
2778      printDebugInfo(*Line);
2779  });
2780  CurrentLines->push_back(std::move(*Line));
2781  Line->Tokens.clear();
2782  Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2783  Line->FirstStartColumn = 0;
2784  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2785    CurrentLines->append(
2786        std::make_move_iterator(PreprocessorDirectives.begin()),
2787        std::make_move_iterator(PreprocessorDirectives.end()));
2788    PreprocessorDirectives.clear();
2789  }
2790  // Disconnect the current token from the last token on the previous line.
2791  FormatTok->Previous = nullptr;
2792}
2793
2794bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2795
2796bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2797  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2798         FormatTok.NewlinesBefore > 0;
2799}
2800
2801// Checks if \p FormatTok is a line comment that continues the line comment
2802// section on \p Line.
2803static bool
2804continuesLineCommentSection(const FormatToken &FormatTok,
2805                            const UnwrappedLine &Line,
2806                            const llvm::Regex &CommentPragmasRegex) {
2807  if (Line.Tokens.empty())
2808    return false;
2809
2810  StringRef IndentContent = FormatTok.TokenText;
2811  if (FormatTok.TokenText.startswith("//") ||
2812      FormatTok.TokenText.startswith("/*"))
2813    IndentContent = FormatTok.TokenText.substr(2);
2814  if (CommentPragmasRegex.match(IndentContent))
2815    return false;
2816
2817  // If Line starts with a line comment, then FormatTok continues the comment
2818  // section if its original column is greater or equal to the original start
2819  // column of the line.
2820  //
2821  // Define the min column token of a line as follows: if a line ends in '{' or
2822  // contains a '{' followed by a line comment, then the min column token is
2823  // that '{'. Otherwise, the min column token of the line is the first token of
2824  // the line.
2825  //
2826  // If Line starts with a token other than a line comment, then FormatTok
2827  // continues the comment section if its original column is greater than the
2828  // original start column of the min column token of the line.
2829  //
2830  // For example, the second line comment continues the first in these cases:
2831  //
2832  // // first line
2833  // // second line
2834  //
2835  // and:
2836  //
2837  // // first line
2838  //  // second line
2839  //
2840  // and:
2841  //
2842  // int i; // first line
2843  //  // second line
2844  //
2845  // and:
2846  //
2847  // do { // first line
2848  //      // second line
2849  //   int i;
2850  // } while (true);
2851  //
2852  // and:
2853  //
2854  // enum {
2855  //   a, // first line
2856  //    // second line
2857  //   b
2858  // };
2859  //
2860  // The second line comment doesn't continue the first in these cases:
2861  //
2862  //   // first line
2863  //  // second line
2864  //
2865  // and:
2866  //
2867  // int i; // first line
2868  // // second line
2869  //
2870  // and:
2871  //
2872  // do { // first line
2873  //   // second line
2874  //   int i;
2875  // } while (true);
2876  //
2877  // and:
2878  //
2879  // enum {
2880  //   a, // first line
2881  //   // second line
2882  // };
2883  const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2884
2885  // Scan for '{//'. If found, use the column of '{' as a min column for line
2886  // comment section continuation.
2887  const FormatToken *PreviousToken = nullptr;
2888  for (const UnwrappedLineNode &Node : Line.Tokens) {
2889    if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2890        isLineComment(*Node.Tok)) {
2891      MinColumnToken = PreviousToken;
2892      break;
2893    }
2894    PreviousToken = Node.Tok;
2895
2896    // Grab the last newline preceding a token in this unwrapped line.
2897    if (Node.Tok->NewlinesBefore > 0) {
2898      MinColumnToken = Node.Tok;
2899    }
2900  }
2901  if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2902    MinColumnToken = PreviousToken;
2903  }
2904
2905  return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2906                              MinColumnToken);
2907}
2908
2909void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2910  bool JustComments = Line->Tokens.empty();
2911  for (SmallVectorImpl<FormatToken *>::const_iterator
2912           I = CommentsBeforeNextToken.begin(),
2913           E = CommentsBeforeNextToken.end();
2914       I != E; ++I) {
2915    // Line comments that belong to the same line comment section are put on the
2916    // same line since later we might want to reflow content between them.
2917    // Additional fine-grained breaking of line comment sections is controlled
2918    // by the class BreakableLineCommentSection in case it is desirable to keep
2919    // several line comment sections in the same unwrapped line.
2920    //
2921    // FIXME: Consider putting separate line comment sections as children to the
2922    // unwrapped line instead.
2923    (*I)->ContinuesLineCommentSection =
2924        continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2925    if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2926      addUnwrappedLine();
2927    pushToken(*I);
2928  }
2929  if (NewlineBeforeNext && JustComments)
2930    addUnwrappedLine();
2931  CommentsBeforeNextToken.clear();
2932}
2933
2934void UnwrappedLineParser::nextToken(int LevelDifference) {
2935  if (eof())
2936    return;
2937  flushComments(isOnNewLine(*FormatTok));
2938  pushToken(FormatTok);
2939  FormatToken *Previous = FormatTok;
2940  if (Style.Language != FormatStyle::LK_JavaScript)
2941    readToken(LevelDifference);
2942  else
2943    readTokenWithJavaScriptASI();
2944  FormatTok->Previous = Previous;
2945}
2946
2947void UnwrappedLineParser::distributeComments(
2948    const SmallVectorImpl<FormatToken *> &Comments,
2949    const FormatToken *NextTok) {
2950  // Whether or not a line comment token continues a line is controlled by
2951  // the method continuesLineCommentSection, with the following caveat:
2952  //
2953  // Define a trail of Comments to be a nonempty proper postfix of Comments such
2954  // that each comment line from the trail is aligned with the next token, if
2955  // the next token exists. If a trail exists, the beginning of the maximal
2956  // trail is marked as a start of a new comment section.
2957  //
2958  // For example in this code:
2959  //
2960  // int a; // line about a
2961  //   // line 1 about b
2962  //   // line 2 about b
2963  //   int b;
2964  //
2965  // the two lines about b form a maximal trail, so there are two sections, the
2966  // first one consisting of the single comment "// line about a" and the
2967  // second one consisting of the next two comments.
2968  if (Comments.empty())
2969    return;
2970  bool ShouldPushCommentsInCurrentLine = true;
2971  bool HasTrailAlignedWithNextToken = false;
2972  unsigned StartOfTrailAlignedWithNextToken = 0;
2973  if (NextTok) {
2974    // We are skipping the first element intentionally.
2975    for (unsigned i = Comments.size() - 1; i > 0; --i) {
2976      if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2977        HasTrailAlignedWithNextToken = true;
2978        StartOfTrailAlignedWithNextToken = i;
2979      }
2980    }
2981  }
2982  for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2983    FormatToken *FormatTok = Comments[i];
2984    if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2985      FormatTok->ContinuesLineCommentSection = false;
2986    } else {
2987      FormatTok->ContinuesLineCommentSection =
2988          continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2989    }
2990    if (!FormatTok->ContinuesLineCommentSection &&
2991        (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2992      ShouldPushCommentsInCurrentLine = false;
2993    }
2994    if (ShouldPushCommentsInCurrentLine) {
2995      pushToken(FormatTok);
2996    } else {
2997      CommentsBeforeNextToken.push_back(FormatTok);
2998    }
2999  }
3000}
3001
3002void UnwrappedLineParser::readToken(int LevelDifference) {
3003  SmallVector<FormatToken *, 1> Comments;
3004  do {
3005    FormatTok = Tokens->getNextToken();
3006    assert(FormatTok);
3007    while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3008           (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3009      distributeComments(Comments, FormatTok);
3010      Comments.clear();
3011      // If there is an unfinished unwrapped line, we flush the preprocessor
3012      // directives only after that unwrapped line was finished later.
3013      bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3014      ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3015      assert((LevelDifference >= 0 ||
3016              static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3017             "LevelDifference makes Line->Level negative");
3018      Line->Level += LevelDifference;
3019      // Comments stored before the preprocessor directive need to be output
3020      // before the preprocessor directive, at the same level as the
3021      // preprocessor directive, as we consider them to apply to the directive.
3022      if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3023          PPBranchLevel > 0)
3024        Line->Level += PPBranchLevel;
3025      flushComments(isOnNewLine(*FormatTok));
3026      parsePPDirective();
3027    }
3028    while (FormatTok->getType() == TT_ConflictStart ||
3029           FormatTok->getType() == TT_ConflictEnd ||
3030           FormatTok->getType() == TT_ConflictAlternative) {
3031      if (FormatTok->getType() == TT_ConflictStart) {
3032        conditionalCompilationStart(/*Unreachable=*/false);
3033      } else if (FormatTok->getType() == TT_ConflictAlternative) {
3034        conditionalCompilationAlternative();
3035      } else if (FormatTok->getType() == TT_ConflictEnd) {
3036        conditionalCompilationEnd();
3037      }
3038      FormatTok = Tokens->getNextToken();
3039      FormatTok->MustBreakBefore = true;
3040    }
3041
3042    if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3043        !Line->InPPDirective) {
3044      continue;
3045    }
3046
3047    if (!FormatTok->Tok.is(tok::comment)) {
3048      distributeComments(Comments, FormatTok);
3049      Comments.clear();
3050      return;
3051    }
3052
3053    Comments.push_back(FormatTok);
3054  } while (!eof());
3055
3056  distributeComments(Comments, nullptr);
3057  Comments.clear();
3058}
3059
3060void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3061  Line->Tokens.push_back(UnwrappedLineNode(Tok));
3062  if (MustBreakBeforeNextToken) {
3063    Line->Tokens.back().Tok->MustBreakBefore = true;
3064    MustBreakBeforeNextToken = false;
3065  }
3066}
3067
3068} // end namespace format
3069} // end namespace clang
3070