1//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file contains the implementation of the UnwrappedLineParser,
12/// which turns a stream of tokens into UnwrappedLines.
13///
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "format-parser"
17
18#include "UnwrappedLineParser.h"
19#include "llvm/Support/Debug.h"
20
21namespace clang {
22namespace format {
23
24class FormatTokenSource {
25public:
26  virtual ~FormatTokenSource() {}
27  virtual FormatToken *getNextToken() = 0;
28
29  virtual unsigned getPosition() = 0;
30  virtual FormatToken *setPosition(unsigned Position) = 0;
31};
32
33namespace {
34
35class ScopedDeclarationState {
36public:
37  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
38                         bool MustBeDeclaration)
39      : Line(Line), Stack(Stack) {
40    Line.MustBeDeclaration = MustBeDeclaration;
41    Stack.push_back(MustBeDeclaration);
42  }
43  ~ScopedDeclarationState() {
44    Stack.pop_back();
45    if (!Stack.empty())
46      Line.MustBeDeclaration = Stack.back();
47    else
48      Line.MustBeDeclaration = true;
49  }
50
51private:
52  UnwrappedLine &Line;
53  std::vector<bool> &Stack;
54};
55
56class ScopedMacroState : public FormatTokenSource {
57public:
58  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
59                   FormatToken *&ResetToken, bool &StructuralError)
60      : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
61        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
62        StructuralError(StructuralError),
63        PreviousStructuralError(StructuralError), Token(NULL) {
64    TokenSource = this;
65    Line.Level = 0;
66    Line.InPPDirective = true;
67  }
68
69  ~ScopedMacroState() {
70    TokenSource = PreviousTokenSource;
71    ResetToken = Token;
72    Line.InPPDirective = false;
73    Line.Level = PreviousLineLevel;
74    StructuralError = PreviousStructuralError;
75  }
76
77  virtual FormatToken *getNextToken() {
78    // The \c UnwrappedLineParser guards against this by never calling
79    // \c getNextToken() after it has encountered the first eof token.
80    assert(!eof());
81    Token = PreviousTokenSource->getNextToken();
82    if (eof())
83      return getFakeEOF();
84    return Token;
85  }
86
87  virtual unsigned getPosition() { return PreviousTokenSource->getPosition(); }
88
89  virtual FormatToken *setPosition(unsigned Position) {
90    Token = PreviousTokenSource->setPosition(Position);
91    return Token;
92  }
93
94private:
95  bool eof() { return Token && Token->HasUnescapedNewline; }
96
97  FormatToken *getFakeEOF() {
98    static bool EOFInitialized = false;
99    static FormatToken FormatTok;
100    if (!EOFInitialized) {
101      FormatTok.Tok.startToken();
102      FormatTok.Tok.setKind(tok::eof);
103      EOFInitialized = true;
104    }
105    return &FormatTok;
106  }
107
108  UnwrappedLine &Line;
109  FormatTokenSource *&TokenSource;
110  FormatToken *&ResetToken;
111  unsigned PreviousLineLevel;
112  FormatTokenSource *PreviousTokenSource;
113  bool &StructuralError;
114  bool PreviousStructuralError;
115
116  FormatToken *Token;
117};
118
119} // end anonymous namespace
120
121class ScopedLineState {
122public:
123  ScopedLineState(UnwrappedLineParser &Parser,
124                  bool SwitchToPreprocessorLines = false)
125      : Parser(Parser) {
126    OriginalLines = Parser.CurrentLines;
127    if (SwitchToPreprocessorLines)
128      Parser.CurrentLines = &Parser.PreprocessorDirectives;
129    else if (!Parser.Line->Tokens.empty())
130      Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
131    PreBlockLine = Parser.Line.take();
132    Parser.Line.reset(new UnwrappedLine());
133    Parser.Line->Level = PreBlockLine->Level;
134    Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
135  }
136
137  ~ScopedLineState() {
138    if (!Parser.Line->Tokens.empty()) {
139      Parser.addUnwrappedLine();
140    }
141    assert(Parser.Line->Tokens.empty());
142    Parser.Line.reset(PreBlockLine);
143    if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
144      Parser.MustBreakBeforeNextToken = true;
145    Parser.CurrentLines = OriginalLines;
146  }
147
148private:
149  UnwrappedLineParser &Parser;
150
151  UnwrappedLine *PreBlockLine;
152  SmallVectorImpl<UnwrappedLine> *OriginalLines;
153};
154
155namespace {
156
157class IndexedTokenSource : public FormatTokenSource {
158public:
159  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
160      : Tokens(Tokens), Position(-1) {}
161
162  virtual FormatToken *getNextToken() {
163    ++Position;
164    return Tokens[Position];
165  }
166
167  virtual unsigned getPosition() {
168    assert(Position >= 0);
169    return Position;
170  }
171
172  virtual FormatToken *setPosition(unsigned P) {
173    Position = P;
174    return Tokens[Position];
175  }
176
177  void reset() { Position = -1; }
178
179private:
180  ArrayRef<FormatToken *> Tokens;
181  int Position;
182};
183
184} // end anonymous namespace
185
186UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
187                                         ArrayRef<FormatToken *> Tokens,
188                                         UnwrappedLineConsumer &Callback)
189    : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
190      CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL),
191      Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
192
193void UnwrappedLineParser::reset() {
194  PPBranchLevel = -1;
195  Line.reset(new UnwrappedLine);
196  CommentsBeforeNextToken.clear();
197  FormatTok = NULL;
198  MustBreakBeforeNextToken = false;
199  PreprocessorDirectives.clear();
200  CurrentLines = &Lines;
201  DeclarationScopeStack.clear();
202  StructuralError = false;
203  PPStack.clear();
204}
205
206bool UnwrappedLineParser::parse() {
207  IndexedTokenSource TokenSource(AllTokens);
208  do {
209    DEBUG(llvm::dbgs() << "----\n");
210    reset();
211    Tokens = &TokenSource;
212    TokenSource.reset();
213
214    readToken();
215    parseFile();
216    // Create line with eof token.
217    pushToken(FormatTok);
218    addUnwrappedLine();
219
220    for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
221                                                  E = Lines.end();
222         I != E; ++I) {
223      Callback.consumeUnwrappedLine(*I);
224    }
225    Callback.finishRun();
226    Lines.clear();
227    while (!PPLevelBranchIndex.empty() &&
228           PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
229      PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
230      PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
231    }
232    if (!PPLevelBranchIndex.empty()) {
233      ++PPLevelBranchIndex.back();
234      assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
235      assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
236    }
237  } while (!PPLevelBranchIndex.empty());
238
239  return StructuralError;
240}
241
242void UnwrappedLineParser::parseFile() {
243  ScopedDeclarationState DeclarationState(
244      *Line, DeclarationScopeStack,
245      /*MustBeDeclaration=*/ !Line->InPPDirective);
246  parseLevel(/*HasOpeningBrace=*/false);
247  // Make sure to format the remaining tokens.
248  flushComments(true);
249  addUnwrappedLine();
250}
251
252void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
253  bool SwitchLabelEncountered = false;
254  do {
255    switch (FormatTok->Tok.getKind()) {
256    case tok::comment:
257      nextToken();
258      addUnwrappedLine();
259      break;
260    case tok::l_brace:
261      // FIXME: Add parameter whether this can happen - if this happens, we must
262      // be in a non-declaration context.
263      parseBlock(/*MustBeDeclaration=*/false);
264      addUnwrappedLine();
265      break;
266    case tok::r_brace:
267      if (HasOpeningBrace)
268        return;
269      StructuralError = true;
270      nextToken();
271      addUnwrappedLine();
272      break;
273    case tok::kw_default:
274    case tok::kw_case:
275      if (!SwitchLabelEncountered &&
276          (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
277        ++Line->Level;
278      SwitchLabelEncountered = true;
279      parseStructuralElement();
280      break;
281    default:
282      parseStructuralElement();
283      break;
284    }
285  } while (!eof());
286}
287
288void UnwrappedLineParser::calculateBraceTypes() {
289  // We'll parse forward through the tokens until we hit
290  // a closing brace or eof - note that getNextToken() will
291  // parse macros, so this will magically work inside macro
292  // definitions, too.
293  unsigned StoredPosition = Tokens->getPosition();
294  unsigned Position = StoredPosition;
295  FormatToken *Tok = FormatTok;
296  // Keep a stack of positions of lbrace tokens. We will
297  // update information about whether an lbrace starts a
298  // braced init list or a different block during the loop.
299  SmallVector<FormatToken *, 8> LBraceStack;
300  assert(Tok->Tok.is(tok::l_brace));
301  do {
302    // Get next none-comment token.
303    FormatToken *NextTok;
304    unsigned ReadTokens = 0;
305    do {
306      NextTok = Tokens->getNextToken();
307      ++ReadTokens;
308    } while (NextTok->is(tok::comment));
309
310    switch (Tok->Tok.getKind()) {
311    case tok::l_brace:
312      LBraceStack.push_back(Tok);
313      break;
314    case tok::r_brace:
315      if (!LBraceStack.empty()) {
316        if (LBraceStack.back()->BlockKind == BK_Unknown) {
317          // If there is a comma, semicolon or right paren after the closing
318          // brace, we assume this is a braced initializer list.  Note that
319          // regardless how we mark inner braces here, we will overwrite the
320          // BlockKind later if we parse a braced list (where all blocks inside
321          // are by default braced lists), or when we explicitly detect blocks
322          // (for example while parsing lambdas).
323          //
324          // We exclude + and - as they can be ObjC visibility modifiers.
325          if (NextTok->isOneOf(tok::comma, tok::semi, tok::r_paren,
326                               tok::r_square, tok::l_brace, tok::colon) ||
327              (NextTok->isBinaryOperator() &&
328               !NextTok->isOneOf(tok::plus, tok::minus))) {
329            Tok->BlockKind = BK_BracedInit;
330            LBraceStack.back()->BlockKind = BK_BracedInit;
331          } else {
332            Tok->BlockKind = BK_Block;
333            LBraceStack.back()->BlockKind = BK_Block;
334          }
335        }
336        LBraceStack.pop_back();
337      }
338      break;
339    case tok::semi:
340    case tok::kw_if:
341    case tok::kw_while:
342    case tok::kw_for:
343    case tok::kw_switch:
344    case tok::kw_try:
345      if (!LBraceStack.empty())
346        LBraceStack.back()->BlockKind = BK_Block;
347      break;
348    default:
349      break;
350    }
351    Tok = NextTok;
352    Position += ReadTokens;
353  } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
354  // Assume other blocks for all unclosed opening braces.
355  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
356    if (LBraceStack[i]->BlockKind == BK_Unknown)
357      LBraceStack[i]->BlockKind = BK_Block;
358  }
359
360  FormatTok = Tokens->setPosition(StoredPosition);
361}
362
363void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
364                                     bool MunchSemi) {
365  assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected");
366  unsigned InitialLevel = Line->Level;
367  nextToken();
368
369  addUnwrappedLine();
370
371  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
372                                          MustBeDeclaration);
373  if (AddLevel)
374    ++Line->Level;
375  parseLevel(/*HasOpeningBrace=*/true);
376
377  if (!FormatTok->Tok.is(tok::r_brace)) {
378    Line->Level = InitialLevel;
379    StructuralError = true;
380    return;
381  }
382
383  nextToken(); // Munch the closing brace.
384  if (MunchSemi && FormatTok->Tok.is(tok::semi))
385    nextToken();
386  Line->Level = InitialLevel;
387}
388
389void UnwrappedLineParser::parseChildBlock() {
390  FormatTok->BlockKind = BK_Block;
391  nextToken();
392  {
393    ScopedLineState LineState(*this);
394    ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
395                                            /*MustBeDeclaration=*/false);
396    Line->Level += 1;
397    parseLevel(/*HasOpeningBrace=*/true);
398    Line->Level -= 1;
399  }
400  nextToken();
401}
402
403void UnwrappedLineParser::parsePPDirective() {
404  assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
405  ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
406  nextToken();
407
408  if (FormatTok->Tok.getIdentifierInfo() == NULL) {
409    parsePPUnknown();
410    return;
411  }
412
413  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
414  case tok::pp_define:
415    parsePPDefine();
416    return;
417  case tok::pp_if:
418    parsePPIf(/*IfDef=*/false);
419    break;
420  case tok::pp_ifdef:
421  case tok::pp_ifndef:
422    parsePPIf(/*IfDef=*/true);
423    break;
424  case tok::pp_else:
425    parsePPElse();
426    break;
427  case tok::pp_elif:
428    parsePPElIf();
429    break;
430  case tok::pp_endif:
431    parsePPEndIf();
432    break;
433  default:
434    parsePPUnknown();
435    break;
436  }
437}
438
439void UnwrappedLineParser::pushPPConditional() {
440  if (!PPStack.empty() && PPStack.back() == PP_Unreachable)
441    PPStack.push_back(PP_Unreachable);
442  else
443    PPStack.push_back(PP_Conditional);
444}
445
446void UnwrappedLineParser::parsePPIf(bool IfDef) {
447  ++PPBranchLevel;
448  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
449  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
450    PPLevelBranchIndex.push_back(0);
451    PPLevelBranchCount.push_back(0);
452  }
453  PPChainBranchIndex.push(0);
454  nextToken();
455  bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
456                         StringRef(FormatTok->Tok.getLiteralData(),
457                                   FormatTok->Tok.getLength()) == "0") ||
458                        FormatTok->Tok.is(tok::kw_false);
459  if ((!IfDef && IsLiteralFalse) || PPLevelBranchIndex[PPBranchLevel] > 0) {
460    PPStack.push_back(PP_Unreachable);
461  } else {
462    pushPPConditional();
463  }
464  parsePPUnknown();
465}
466
467void UnwrappedLineParser::parsePPElse() {
468  if (!PPStack.empty())
469    PPStack.pop_back();
470  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
471  if (!PPChainBranchIndex.empty())
472    ++PPChainBranchIndex.top();
473  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
474      PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()) {
475    PPStack.push_back(PP_Unreachable);
476  } else {
477    pushPPConditional();
478  }
479  parsePPUnknown();
480}
481
482void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
483
484void UnwrappedLineParser::parsePPEndIf() {
485  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
486  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
487    if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
488      PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
489    }
490  }
491  --PPBranchLevel;
492  if (!PPChainBranchIndex.empty())
493    PPChainBranchIndex.pop();
494  if (!PPStack.empty())
495    PPStack.pop_back();
496  parsePPUnknown();
497}
498
499void UnwrappedLineParser::parsePPDefine() {
500  nextToken();
501
502  if (FormatTok->Tok.getKind() != tok::identifier) {
503    parsePPUnknown();
504    return;
505  }
506  nextToken();
507  if (FormatTok->Tok.getKind() == tok::l_paren &&
508      FormatTok->WhitespaceRange.getBegin() ==
509          FormatTok->WhitespaceRange.getEnd()) {
510    parseParens();
511  }
512  addUnwrappedLine();
513  Line->Level = 1;
514
515  // Errors during a preprocessor directive can only affect the layout of the
516  // preprocessor directive, and thus we ignore them. An alternative approach
517  // would be to use the same approach we use on the file level (no
518  // re-indentation if there was a structural error) within the macro
519  // definition.
520  parseFile();
521}
522
523void UnwrappedLineParser::parsePPUnknown() {
524  do {
525    nextToken();
526  } while (!eof());
527  addUnwrappedLine();
528}
529
530// Here we blacklist certain tokens that are not usually the first token in an
531// unwrapped line. This is used in attempt to distinguish macro calls without
532// trailing semicolons from other constructs split to several lines.
533bool tokenCanStartNewLine(clang::Token Tok) {
534  // Semicolon can be a null-statement, l_square can be a start of a macro or
535  // a C++11 attribute, but this doesn't seem to be common.
536  return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
537         Tok.isNot(tok::l_square) &&
538         // Tokens that can only be used as binary operators and a part of
539         // overloaded operator names.
540         Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
541         Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
542         Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
543         Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
544         Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
545         Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
546         Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
547         Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
548         Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
549         Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
550         Tok.isNot(tok::lesslessequal) &&
551         // Colon is used in labels, base class lists, initializer lists,
552         // range-based for loops, ternary operator, but should never be the
553         // first token in an unwrapped line.
554         Tok.isNot(tok::colon);
555}
556
557void UnwrappedLineParser::parseStructuralElement() {
558  assert(!FormatTok->Tok.is(tok::l_brace));
559  switch (FormatTok->Tok.getKind()) {
560  case tok::at:
561    nextToken();
562    if (FormatTok->Tok.is(tok::l_brace)) {
563      parseBracedList();
564      break;
565    }
566    switch (FormatTok->Tok.getObjCKeywordID()) {
567    case tok::objc_public:
568    case tok::objc_protected:
569    case tok::objc_package:
570    case tok::objc_private:
571      return parseAccessSpecifier();
572    case tok::objc_interface:
573    case tok::objc_implementation:
574      return parseObjCInterfaceOrImplementation();
575    case tok::objc_protocol:
576      return parseObjCProtocol();
577    case tok::objc_end:
578      return; // Handled by the caller.
579    case tok::objc_optional:
580    case tok::objc_required:
581      nextToken();
582      addUnwrappedLine();
583      return;
584    default:
585      break;
586    }
587    break;
588  case tok::kw_namespace:
589    parseNamespace();
590    return;
591  case tok::kw_inline:
592    nextToken();
593    if (FormatTok->Tok.is(tok::kw_namespace)) {
594      parseNamespace();
595      return;
596    }
597    break;
598  case tok::kw_public:
599  case tok::kw_protected:
600  case tok::kw_private:
601    parseAccessSpecifier();
602    return;
603  case tok::kw_if:
604    parseIfThenElse();
605    return;
606  case tok::kw_for:
607  case tok::kw_while:
608    parseForOrWhileLoop();
609    return;
610  case tok::kw_do:
611    parseDoWhile();
612    return;
613  case tok::kw_switch:
614    parseSwitch();
615    return;
616  case tok::kw_default:
617    nextToken();
618    parseLabel();
619    return;
620  case tok::kw_case:
621    parseCaseLabel();
622    return;
623  case tok::kw_return:
624    parseReturn();
625    return;
626  case tok::kw_extern:
627    nextToken();
628    if (FormatTok->Tok.is(tok::string_literal)) {
629      nextToken();
630      if (FormatTok->Tok.is(tok::l_brace)) {
631        parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
632        addUnwrappedLine();
633        return;
634      }
635    }
636    // In all other cases, parse the declaration.
637    break;
638  default:
639    break;
640  }
641  do {
642    switch (FormatTok->Tok.getKind()) {
643    case tok::at:
644      nextToken();
645      if (FormatTok->Tok.is(tok::l_brace))
646        parseBracedList();
647      break;
648    case tok::kw_enum:
649      parseEnum();
650      break;
651    case tok::kw_struct:
652    case tok::kw_union:
653    case tok::kw_class:
654      parseRecord();
655      // A record declaration or definition is always the start of a structural
656      // element.
657      break;
658    case tok::semi:
659      nextToken();
660      addUnwrappedLine();
661      return;
662    case tok::r_brace:
663      addUnwrappedLine();
664      return;
665    case tok::l_paren:
666      parseParens();
667      break;
668    case tok::caret:
669      nextToken();
670      if (FormatTok->is(tok::l_brace)) {
671        parseChildBlock();
672      }
673      break;
674    case tok::l_brace:
675      if (!tryToParseBracedList()) {
676        // A block outside of parentheses must be the last part of a
677        // structural element.
678        // FIXME: Figure out cases where this is not true, and add projections
679        // for them (the one we know is missing are lambdas).
680        if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
681            Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup ||
682            Style.BreakBeforeBraces == FormatStyle::BS_Allman)
683          addUnwrappedLine();
684        parseBlock(/*MustBeDeclaration=*/false);
685        addUnwrappedLine();
686        return;
687      }
688      // Otherwise this was a braced init list, and the structural
689      // element continues.
690      break;
691    case tok::identifier: {
692      StringRef Text = FormatTok->TokenText;
693      nextToken();
694      if (Line->Tokens.size() == 1) {
695        if (FormatTok->Tok.is(tok::colon)) {
696          parseLabel();
697          return;
698        }
699        // Recognize function-like macro usages without trailing semicolon.
700        if (FormatTok->Tok.is(tok::l_paren)) {
701          parseParens();
702          if (FormatTok->HasUnescapedNewline &&
703              tokenCanStartNewLine(FormatTok->Tok)) {
704            addUnwrappedLine();
705            return;
706          }
707        } else if (FormatTok->HasUnescapedNewline && Text.size() >= 5 &&
708                   Text == Text.upper()) {
709          // Recognize free-standing macros like Q_OBJECT.
710          addUnwrappedLine();
711          return;
712        }
713      }
714      break;
715    }
716    case tok::equal:
717      nextToken();
718      if (FormatTok->Tok.is(tok::l_brace)) {
719        parseBracedList();
720      }
721      break;
722    case tok::l_square:
723      tryToParseLambda();
724      break;
725    default:
726      nextToken();
727      break;
728    }
729  } while (!eof());
730}
731
732void UnwrappedLineParser::tryToParseLambda() {
733  // FIXME: This is a dirty way to access the previous token. Find a better
734  // solution.
735  if (!Line->Tokens.empty() &&
736      Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator)) {
737    nextToken();
738    return;
739  }
740  assert(FormatTok->is(tok::l_square));
741  FormatToken &LSquare = *FormatTok;
742  if (!tryToParseLambdaIntroducer())
743    return;
744
745  while (FormatTok->isNot(tok::l_brace)) {
746    switch (FormatTok->Tok.getKind()) {
747    case tok::l_brace:
748      break;
749    case tok::l_paren:
750      parseParens();
751      break;
752    case tok::identifier:
753    case tok::kw_mutable:
754      nextToken();
755      break;
756    default:
757      return;
758    }
759  }
760  LSquare.Type = TT_LambdaLSquare;
761  parseChildBlock();
762}
763
764bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
765  nextToken();
766  if (FormatTok->is(tok::equal)) {
767    nextToken();
768    if (FormatTok->is(tok::r_square)) {
769      nextToken();
770      return true;
771    }
772    if (FormatTok->isNot(tok::comma))
773      return false;
774    nextToken();
775  } else if (FormatTok->is(tok::amp)) {
776    nextToken();
777    if (FormatTok->is(tok::r_square)) {
778      nextToken();
779      return true;
780    }
781    if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
782      return false;
783    }
784    if (FormatTok->is(tok::comma))
785      nextToken();
786  } else if (FormatTok->is(tok::r_square)) {
787    nextToken();
788    return true;
789  }
790  do {
791    if (FormatTok->is(tok::amp))
792      nextToken();
793    if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
794      return false;
795    nextToken();
796    if (FormatTok->is(tok::comma)) {
797      nextToken();
798    } else if (FormatTok->is(tok::r_square)) {
799      nextToken();
800      return true;
801    } else {
802      return false;
803    }
804  } while (!eof());
805  return false;
806}
807
808bool UnwrappedLineParser::tryToParseBracedList() {
809  if (FormatTok->BlockKind == BK_Unknown)
810    calculateBraceTypes();
811  assert(FormatTok->BlockKind != BK_Unknown);
812  if (FormatTok->BlockKind == BK_Block)
813    return false;
814  parseBracedList();
815  return true;
816}
817
818bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
819  bool HasError = false;
820  nextToken();
821
822  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
823  // replace this by using parseAssigmentExpression() inside.
824  do {
825    // FIXME: When we start to support lambdas, we'll want to parse them away
826    // here, otherwise our bail-out scenarios below break. The better solution
827    // might be to just implement a more or less complete expression parser.
828    switch (FormatTok->Tok.getKind()) {
829    case tok::caret:
830      nextToken();
831      if (FormatTok->is(tok::l_brace)) {
832        parseChildBlock();
833      }
834      break;
835    case tok::l_square:
836      tryToParseLambda();
837      break;
838    case tok::l_brace:
839      // Assume there are no blocks inside a braced init list apart
840      // from the ones we explicitly parse out (like lambdas).
841      FormatTok->BlockKind = BK_BracedInit;
842      parseBracedList();
843      break;
844    case tok::r_brace:
845      nextToken();
846      return !HasError;
847    case tok::semi:
848      HasError = true;
849      if (!ContinueOnSemicolons)
850        return !HasError;
851      nextToken();
852      break;
853    case tok::comma:
854      nextToken();
855      break;
856    default:
857      nextToken();
858      break;
859    }
860  } while (!eof());
861  return false;
862}
863
864void UnwrappedLineParser::parseReturn() {
865  nextToken();
866
867  do {
868    switch (FormatTok->Tok.getKind()) {
869    case tok::l_brace:
870      parseBracedList();
871      if (FormatTok->Tok.isNot(tok::semi)) {
872        // Assume missing ';'.
873        addUnwrappedLine();
874        return;
875      }
876      break;
877    case tok::l_paren:
878      parseParens();
879      break;
880    case tok::r_brace:
881      // Assume missing ';'.
882      addUnwrappedLine();
883      return;
884    case tok::semi:
885      nextToken();
886      addUnwrappedLine();
887      return;
888    case tok::l_square:
889      tryToParseLambda();
890      break;
891    default:
892      nextToken();
893      break;
894    }
895  } while (!eof());
896}
897
898void UnwrappedLineParser::parseParens() {
899  assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
900  nextToken();
901  do {
902    switch (FormatTok->Tok.getKind()) {
903    case tok::l_paren:
904      parseParens();
905      break;
906    case tok::r_paren:
907      nextToken();
908      return;
909    case tok::r_brace:
910      // A "}" inside parenthesis is an error if there wasn't a matching "{".
911      return;
912    case tok::l_square:
913      tryToParseLambda();
914      break;
915    case tok::l_brace: {
916      if (!tryToParseBracedList()) {
917        parseChildBlock();
918      }
919      break;
920    }
921    case tok::at:
922      nextToken();
923      if (FormatTok->Tok.is(tok::l_brace))
924        parseBracedList();
925      break;
926    default:
927      nextToken();
928      break;
929    }
930  } while (!eof());
931}
932
933void UnwrappedLineParser::parseIfThenElse() {
934  assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
935  nextToken();
936  if (FormatTok->Tok.is(tok::l_paren))
937    parseParens();
938  bool NeedsUnwrappedLine = false;
939  if (FormatTok->Tok.is(tok::l_brace)) {
940    if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
941      addUnwrappedLine();
942    parseBlock(/*MustBeDeclaration=*/false);
943    if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
944      addUnwrappedLine();
945    else
946      NeedsUnwrappedLine = true;
947  } else {
948    addUnwrappedLine();
949    ++Line->Level;
950    parseStructuralElement();
951    --Line->Level;
952  }
953  if (FormatTok->Tok.is(tok::kw_else)) {
954    nextToken();
955    if (FormatTok->Tok.is(tok::l_brace)) {
956      if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
957        addUnwrappedLine();
958      parseBlock(/*MustBeDeclaration=*/false);
959      addUnwrappedLine();
960    } else if (FormatTok->Tok.is(tok::kw_if)) {
961      parseIfThenElse();
962    } else {
963      addUnwrappedLine();
964      ++Line->Level;
965      parseStructuralElement();
966      --Line->Level;
967    }
968  } else if (NeedsUnwrappedLine) {
969    addUnwrappedLine();
970  }
971}
972
973void UnwrappedLineParser::parseNamespace() {
974  assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
975  nextToken();
976  if (FormatTok->Tok.is(tok::identifier))
977    nextToken();
978  if (FormatTok->Tok.is(tok::l_brace)) {
979    if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
980        Style.BreakBeforeBraces == FormatStyle::BS_Allman)
981      addUnwrappedLine();
982
983    bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
984                    (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
985                     DeclarationScopeStack.size() > 1);
986    parseBlock(/*MustBeDeclaration=*/true, AddLevel);
987    // Munch the semicolon after a namespace. This is more common than one would
988    // think. Puttin the semicolon into its own line is very ugly.
989    if (FormatTok->Tok.is(tok::semi))
990      nextToken();
991    addUnwrappedLine();
992  }
993  // FIXME: Add error handling.
994}
995
996void UnwrappedLineParser::parseForOrWhileLoop() {
997  assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) &&
998         "'for' or 'while' expected");
999  nextToken();
1000  if (FormatTok->Tok.is(tok::l_paren))
1001    parseParens();
1002  if (FormatTok->Tok.is(tok::l_brace)) {
1003    if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1004      addUnwrappedLine();
1005    parseBlock(/*MustBeDeclaration=*/false);
1006    addUnwrappedLine();
1007  } else {
1008    addUnwrappedLine();
1009    ++Line->Level;
1010    parseStructuralElement();
1011    --Line->Level;
1012  }
1013}
1014
1015void UnwrappedLineParser::parseDoWhile() {
1016  assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1017  nextToken();
1018  if (FormatTok->Tok.is(tok::l_brace)) {
1019    if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1020      addUnwrappedLine();
1021    parseBlock(/*MustBeDeclaration=*/false);
1022  } else {
1023    addUnwrappedLine();
1024    ++Line->Level;
1025    parseStructuralElement();
1026    --Line->Level;
1027  }
1028
1029  // FIXME: Add error handling.
1030  if (!FormatTok->Tok.is(tok::kw_while)) {
1031    addUnwrappedLine();
1032    return;
1033  }
1034
1035  nextToken();
1036  parseStructuralElement();
1037}
1038
1039void UnwrappedLineParser::parseLabel() {
1040  nextToken();
1041  unsigned OldLineLevel = Line->Level;
1042  if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1043    --Line->Level;
1044  if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1045    if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1046      addUnwrappedLine();
1047    parseBlock(/*MustBeDeclaration=*/false);
1048    if (FormatTok->Tok.is(tok::kw_break)) {
1049      // "break;" after "}" on its own line only for BS_Allman
1050      if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1051        addUnwrappedLine();
1052      parseStructuralElement();
1053    }
1054  }
1055  addUnwrappedLine();
1056  Line->Level = OldLineLevel;
1057}
1058
1059void UnwrappedLineParser::parseCaseLabel() {
1060  assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1061  // FIXME: fix handling of complex expressions here.
1062  do {
1063    nextToken();
1064  } while (!eof() && !FormatTok->Tok.is(tok::colon));
1065  parseLabel();
1066}
1067
1068void UnwrappedLineParser::parseSwitch() {
1069  assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1070  nextToken();
1071  if (FormatTok->Tok.is(tok::l_paren))
1072    parseParens();
1073  if (FormatTok->Tok.is(tok::l_brace)) {
1074    if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1075      addUnwrappedLine();
1076    parseBlock(/*MustBeDeclaration=*/false);
1077    addUnwrappedLine();
1078  } else {
1079    addUnwrappedLine();
1080    ++Line->Level;
1081    parseStructuralElement();
1082    --Line->Level;
1083  }
1084}
1085
1086void UnwrappedLineParser::parseAccessSpecifier() {
1087  nextToken();
1088  // Otherwise, we don't know what it is, and we'd better keep the next token.
1089  if (FormatTok->Tok.is(tok::colon))
1090    nextToken();
1091  addUnwrappedLine();
1092}
1093
1094void UnwrappedLineParser::parseEnum() {
1095  nextToken();
1096  // Eat up enum class ...
1097  if (FormatTok->Tok.is(tok::kw_class) ||
1098      FormatTok->Tok.is(tok::kw_struct))
1099      nextToken();
1100  while (FormatTok->Tok.getIdentifierInfo() ||
1101         FormatTok->isOneOf(tok::colon, tok::coloncolon)) {
1102    nextToken();
1103    // We can have macros or attributes in between 'enum' and the enum name.
1104    if (FormatTok->Tok.is(tok::l_paren)) {
1105      parseParens();
1106    }
1107    if (FormatTok->Tok.is(tok::identifier))
1108      nextToken();
1109  }
1110  if (FormatTok->Tok.is(tok::l_brace)) {
1111    FormatTok->BlockKind = BK_Block;
1112    bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1113    if (HasError) {
1114      if (FormatTok->is(tok::semi))
1115        nextToken();
1116      addUnwrappedLine();
1117    }
1118  }
1119  // We fall through to parsing a structural element afterwards, so that in
1120  // enum A {} n, m;
1121  // "} n, m;" will end up in one unwrapped line.
1122}
1123
1124void UnwrappedLineParser::parseRecord() {
1125  nextToken();
1126  if (FormatTok->Tok.is(tok::identifier) ||
1127      FormatTok->Tok.is(tok::kw___attribute) ||
1128      FormatTok->Tok.is(tok::kw___declspec) ||
1129      FormatTok->Tok.is(tok::kw_alignas)) {
1130    nextToken();
1131    // We can have macros or attributes in between 'class' and the class name.
1132    if (FormatTok->Tok.is(tok::l_paren)) {
1133      parseParens();
1134    }
1135    // The actual identifier can be a nested name specifier, and in macros
1136    // it is often token-pasted.
1137    while (FormatTok->Tok.is(tok::identifier) ||
1138           FormatTok->Tok.is(tok::coloncolon) ||
1139           FormatTok->Tok.is(tok::hashhash))
1140      nextToken();
1141
1142    // Note that parsing away template declarations here leads to incorrectly
1143    // accepting function declarations as record declarations.
1144    // In general, we cannot solve this problem. Consider:
1145    // class A<int> B() {}
1146    // which can be a function definition or a class definition when B() is a
1147    // macro. If we find enough real-world cases where this is a problem, we
1148    // can parse for the 'template' keyword in the beginning of the statement,
1149    // and thus rule out the record production in case there is no template
1150    // (this would still leave us with an ambiguity between template function
1151    // and class declarations).
1152    if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) {
1153      while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) {
1154        if (FormatTok->Tok.is(tok::semi))
1155          return;
1156        nextToken();
1157      }
1158    }
1159  }
1160  if (FormatTok->Tok.is(tok::l_brace)) {
1161    if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
1162        Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1163      addUnwrappedLine();
1164
1165    parseBlock(/*MustBeDeclaration=*/true, /*Addlevel=*/true,
1166               /*MunchSemi=*/false);
1167  }
1168  // We fall through to parsing a structural element afterwards, so
1169  // class A {} n, m;
1170  // will end up in one unwrapped line.
1171}
1172
1173void UnwrappedLineParser::parseObjCProtocolList() {
1174  assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1175  do
1176    nextToken();
1177  while (!eof() && FormatTok->Tok.isNot(tok::greater));
1178  nextToken(); // Skip '>'.
1179}
1180
1181void UnwrappedLineParser::parseObjCUntilAtEnd() {
1182  do {
1183    if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1184      nextToken();
1185      addUnwrappedLine();
1186      break;
1187    }
1188    if (FormatTok->is(tok::l_brace)) {
1189      parseBlock(/*MustBeDeclaration=*/false);
1190      // In ObjC interfaces, nothing should be following the "}".
1191      addUnwrappedLine();
1192    } else {
1193      parseStructuralElement();
1194    }
1195  } while (!eof());
1196}
1197
1198void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1199  nextToken();
1200  nextToken(); // interface name
1201
1202  // @interface can be followed by either a base class, or a category.
1203  if (FormatTok->Tok.is(tok::colon)) {
1204    nextToken();
1205    nextToken(); // base class name
1206  } else if (FormatTok->Tok.is(tok::l_paren))
1207    // Skip category, if present.
1208    parseParens();
1209
1210  if (FormatTok->Tok.is(tok::less))
1211    parseObjCProtocolList();
1212
1213  // If instance variables are present, keep the '{' on the first line too.
1214  if (FormatTok->Tok.is(tok::l_brace))
1215    parseBlock(/*MustBeDeclaration=*/true);
1216
1217  // With instance variables, this puts '}' on its own line.  Without instance
1218  // variables, this ends the @interface line.
1219  addUnwrappedLine();
1220
1221  parseObjCUntilAtEnd();
1222}
1223
1224void UnwrappedLineParser::parseObjCProtocol() {
1225  nextToken();
1226  nextToken(); // protocol name
1227
1228  if (FormatTok->Tok.is(tok::less))
1229    parseObjCProtocolList();
1230
1231  // Check for protocol declaration.
1232  if (FormatTok->Tok.is(tok::semi)) {
1233    nextToken();
1234    return addUnwrappedLine();
1235  }
1236
1237  addUnwrappedLine();
1238  parseObjCUntilAtEnd();
1239}
1240
1241LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1242                                                 StringRef Prefix = "") {
1243  llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1244               << (Line.InPPDirective ? " MACRO" : "") << ": ";
1245  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1246                                                    E = Line.Tokens.end();
1247       I != E; ++I) {
1248    llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1249  }
1250  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1251                                                    E = Line.Tokens.end();
1252       I != E; ++I) {
1253    const UnwrappedLineNode &Node = *I;
1254    for (SmallVectorImpl<UnwrappedLine>::const_iterator
1255             I = Node.Children.begin(),
1256             E = Node.Children.end();
1257         I != E; ++I) {
1258      printDebugInfo(*I, "\nChild: ");
1259    }
1260  }
1261  llvm::dbgs() << "\n";
1262}
1263
1264void UnwrappedLineParser::addUnwrappedLine() {
1265  if (Line->Tokens.empty())
1266    return;
1267  DEBUG({
1268    if (CurrentLines == &Lines)
1269      printDebugInfo(*Line);
1270  });
1271  CurrentLines->push_back(*Line);
1272  Line->Tokens.clear();
1273  if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1274    for (SmallVectorImpl<UnwrappedLine>::iterator
1275             I = PreprocessorDirectives.begin(),
1276             E = PreprocessorDirectives.end();
1277         I != E; ++I) {
1278      CurrentLines->push_back(*I);
1279    }
1280    PreprocessorDirectives.clear();
1281  }
1282}
1283
1284bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1285
1286void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1287  bool JustComments = Line->Tokens.empty();
1288  for (SmallVectorImpl<FormatToken *>::const_iterator
1289           I = CommentsBeforeNextToken.begin(),
1290           E = CommentsBeforeNextToken.end();
1291       I != E; ++I) {
1292    if ((*I)->NewlinesBefore && JustComments) {
1293      addUnwrappedLine();
1294    }
1295    pushToken(*I);
1296  }
1297  if (NewlineBeforeNext && JustComments) {
1298    addUnwrappedLine();
1299  }
1300  CommentsBeforeNextToken.clear();
1301}
1302
1303void UnwrappedLineParser::nextToken() {
1304  if (eof())
1305    return;
1306  flushComments(FormatTok->NewlinesBefore > 0);
1307  pushToken(FormatTok);
1308  readToken();
1309}
1310
1311void UnwrappedLineParser::readToken() {
1312  bool CommentsInCurrentLine = true;
1313  do {
1314    FormatTok = Tokens->getNextToken();
1315    while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1316           (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1317      // If there is an unfinished unwrapped line, we flush the preprocessor
1318      // directives only after that unwrapped line was finished later.
1319      bool SwitchToPreprocessorLines =
1320          !Line->Tokens.empty() && CurrentLines == &Lines;
1321      ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1322      // Comments stored before the preprocessor directive need to be output
1323      // before the preprocessor directive, at the same level as the
1324      // preprocessor directive, as we consider them to apply to the directive.
1325      flushComments(FormatTok->NewlinesBefore > 0);
1326      parsePPDirective();
1327    }
1328
1329    if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1330        !Line->InPPDirective) {
1331      continue;
1332    }
1333
1334    if (!FormatTok->Tok.is(tok::comment))
1335      return;
1336    if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) {
1337      CommentsInCurrentLine = false;
1338    }
1339    if (CommentsInCurrentLine) {
1340      pushToken(FormatTok);
1341    } else {
1342      CommentsBeforeNextToken.push_back(FormatTok);
1343    }
1344  } while (!eof());
1345}
1346
1347void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1348  Line->Tokens.push_back(UnwrappedLineNode(Tok));
1349  if (MustBreakBeforeNextToken) {
1350    Line->Tokens.back().Tok->MustBreakBefore = true;
1351    MustBreakBeforeNextToken = false;
1352  }
1353}
1354
1355} // end namespace format
1356} // end namespace clang
1357