1249261Sdim//===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===//
2249261Sdim//
3249261Sdim//                     The LLVM Compiler Infrastructure
4249261Sdim//
5249261Sdim// This file is distributed under the University of Illinois Open Source
6249261Sdim// License. See LICENSE.TXT for details.
7249261Sdim//
8249261Sdim//===----------------------------------------------------------------------===//
9249261Sdim///
10249261Sdim/// \file
11249261Sdim/// \brief This file contains the declaration of the UnwrappedLineParser,
12249261Sdim/// which turns a stream of tokens into UnwrappedLines.
13249261Sdim///
14249261Sdim//===----------------------------------------------------------------------===//
15249261Sdim
16249261Sdim#ifndef LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
17249261Sdim#define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
18249261Sdim
19249261Sdim#include "clang/Basic/IdentifierTable.h"
20249261Sdim#include "clang/Format/Format.h"
21263508Sdim#include "FormatToken.h"
22249261Sdim#include <list>
23249261Sdim
24249261Sdimnamespace clang {
25249261Sdimnamespace format {
26249261Sdim
27263508Sdimstruct UnwrappedLineNode;
28249261Sdim
29249261Sdim/// \brief An unwrapped line is a sequence of \c Token, that we would like to
30249261Sdim/// put on a single line if there was no column limit.
31249261Sdim///
32249261Sdim/// This is used as a main interface between the \c UnwrappedLineParser and the
33249261Sdim/// \c UnwrappedLineFormatter. The key property is that changing the formatting
34249261Sdim/// within an unwrapped line does not affect any other unwrapped lines.
35249261Sdimstruct UnwrappedLine {
36263508Sdim  UnwrappedLine();
37249261Sdim
38249261Sdim  // FIXME: Don't use std::list here.
39249261Sdim  /// \brief The \c Tokens comprising this \c UnwrappedLine.
40263508Sdim  std::list<UnwrappedLineNode> Tokens;
41249261Sdim
42249261Sdim  /// \brief The indent level of the \c UnwrappedLine.
43249261Sdim  unsigned Level;
44249261Sdim
45249261Sdim  /// \brief Whether this \c UnwrappedLine is part of a preprocessor directive.
46249261Sdim  bool InPPDirective;
47249261Sdim
48249261Sdim  bool MustBeDeclaration;
49249261Sdim};
50249261Sdim
51249261Sdimclass UnwrappedLineConsumer {
52249261Sdimpublic:
53263508Sdim  virtual ~UnwrappedLineConsumer() {}
54249261Sdim  virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0;
55263508Sdim  virtual void finishRun() = 0;
56249261Sdim};
57249261Sdim
58263508Sdimclass FormatTokenSource;
59249261Sdim
60249261Sdimclass UnwrappedLineParser {
61249261Sdimpublic:
62263508Sdim  UnwrappedLineParser(const FormatStyle &Style, ArrayRef<FormatToken *> Tokens,
63249261Sdim                      UnwrappedLineConsumer &Callback);
64249261Sdim
65249261Sdim  /// Returns true in case of a structural error.
66249261Sdim  bool parse();
67249261Sdim
68249261Sdimprivate:
69263508Sdim  void reset();
70251662Sdim  void parseFile();
71251662Sdim  void parseLevel(bool HasOpeningBrace);
72263508Sdim  void parseBlock(bool MustBeDeclaration, bool AddLevel = true,
73263508Sdim                  bool MunchSemi = true);
74263508Sdim  void parseChildBlock();
75249261Sdim  void parsePPDirective();
76249261Sdim  void parsePPDefine();
77263508Sdim  void parsePPIf(bool IfDef);
78263508Sdim  void parsePPElIf();
79263508Sdim  void parsePPElse();
80263508Sdim  void parsePPEndIf();
81249261Sdim  void parsePPUnknown();
82249261Sdim  void parseStructuralElement();
83263508Sdim  bool tryToParseBracedList();
84263508Sdim  bool parseBracedList(bool ContinueOnSemicolons = false);
85249261Sdim  void parseReturn();
86249261Sdim  void parseParens();
87249261Sdim  void parseIfThenElse();
88249261Sdim  void parseForOrWhileLoop();
89249261Sdim  void parseDoWhile();
90249261Sdim  void parseLabel();
91249261Sdim  void parseCaseLabel();
92249261Sdim  void parseSwitch();
93249261Sdim  void parseNamespace();
94249261Sdim  void parseAccessSpecifier();
95249261Sdim  void parseEnum();
96249261Sdim  void parseRecord();
97249261Sdim  void parseObjCProtocolList();
98249261Sdim  void parseObjCUntilAtEnd();
99249261Sdim  void parseObjCInterfaceOrImplementation();
100249261Sdim  void parseObjCProtocol();
101263508Sdim  void tryToParseLambda();
102263508Sdim  bool tryToParseLambdaIntroducer();
103249261Sdim  void addUnwrappedLine();
104249261Sdim  bool eof() const;
105249261Sdim  void nextToken();
106249261Sdim  void readToken();
107249261Sdim  void flushComments(bool NewlineBeforeNext);
108263508Sdim  void pushToken(FormatToken *Tok);
109263508Sdim  void calculateBraceTypes();
110263508Sdim  void pushPPConditional();
111249261Sdim
112249261Sdim  // FIXME: We are constantly running into bugs where Line.Level is incorrectly
113249261Sdim  // subtracted from beyond 0. Introduce a method to subtract from Line.Level
114249261Sdim  // and use that everywhere in the Parser.
115249261Sdim  OwningPtr<UnwrappedLine> Line;
116249261Sdim
117249261Sdim  // Comments are sorted into unwrapped lines by whether they are in the same
118249261Sdim  // line as the previous token, or not. If not, they belong to the next token.
119249261Sdim  // Since the next token might already be in a new unwrapped line, we need to
120249261Sdim  // store the comments belonging to that token.
121263508Sdim  SmallVector<FormatToken *, 1> CommentsBeforeNextToken;
122263508Sdim  FormatToken *FormatTok;
123249261Sdim  bool MustBreakBeforeNextToken;
124249261Sdim
125249261Sdim  // The parsed lines. Only added to through \c CurrentLines.
126263508Sdim  SmallVector<UnwrappedLine, 8> Lines;
127249261Sdim
128249261Sdim  // Preprocessor directives are parsed out-of-order from other unwrapped lines.
129249261Sdim  // Thus, we need to keep a list of preprocessor directives to be reported
130249261Sdim  // after an unwarpped line that has been started was finished.
131263508Sdim  SmallVector<UnwrappedLine, 4> PreprocessorDirectives;
132249261Sdim
133249261Sdim  // New unwrapped lines are added via CurrentLines.
134249261Sdim  // Usually points to \c &Lines. While parsing a preprocessor directive when
135249261Sdim  // there is an unfinished previous unwrapped line, will point to
136249261Sdim  // \c &PreprocessorDirectives.
137263508Sdim  SmallVectorImpl<UnwrappedLine> *CurrentLines;
138249261Sdim
139249261Sdim  // We store for each line whether it must be a declaration depending on
140249261Sdim  // whether we are in a compound statement or not.
141249261Sdim  std::vector<bool> DeclarationScopeStack;
142249261Sdim
143251662Sdim  // Will be true if we encounter an error that leads to possibily incorrect
144251662Sdim  // indentation levels.
145251662Sdim  bool StructuralError;
146251662Sdim
147249261Sdim  const FormatStyle &Style;
148249261Sdim  FormatTokenSource *Tokens;
149249261Sdim  UnwrappedLineConsumer &Callback;
150249261Sdim
151263508Sdim  // FIXME: This is a temporary measure until we have reworked the ownership
152263508Sdim  // of the format tokens. The goal is to have the actual tokens created and
153263508Sdim  // owned outside of and handed into the UnwrappedLineParser.
154263508Sdim  ArrayRef<FormatToken *> AllTokens;
155263508Sdim
156263508Sdim  // Represents preprocessor branch type, so we can find matching
157263508Sdim  // #if/#else/#endif directives.
158263508Sdim  enum PPBranchKind {
159263508Sdim    PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0
160263508Sdim    PP_Unreachable  // #if 0 or a conditional preprocessor block inside #if 0
161263508Sdim  };
162263508Sdim
163263508Sdim  // Keeps a stack of currently active preprocessor branching directives.
164263508Sdim  SmallVector<PPBranchKind, 16> PPStack;
165263508Sdim
166263508Sdim  // The \c UnwrappedLineParser re-parses the code for each combination
167263508Sdim  // of preprocessor branches that can be taken.
168263508Sdim  // To that end, we take the same branch (#if, #else, or one of the #elif
169263508Sdim  // branches) for each nesting level of preprocessor branches.
170263508Sdim  // \c PPBranchLevel stores the current nesting level of preprocessor
171263508Sdim  // branches during one pass over the code.
172263508Sdim  int PPBranchLevel;
173263508Sdim
174263508Sdim  // Contains the current branch (#if, #else or one of the #elif branches)
175263508Sdim  // for each nesting level.
176263508Sdim  SmallVector<int, 8> PPLevelBranchIndex;
177263508Sdim
178263508Sdim  // Contains the maximum number of branches at each nesting level.
179263508Sdim  SmallVector<int, 8> PPLevelBranchCount;
180263508Sdim
181263508Sdim  // Contains the number of branches per nesting level we are currently
182263508Sdim  // in while parsing a preprocessor branch sequence.
183263508Sdim  // This is used to update PPLevelBranchCount at the end of a branch
184263508Sdim  // sequence.
185263508Sdim  std::stack<int> PPChainBranchIndex;
186263508Sdim
187249261Sdim  friend class ScopedLineState;
188249261Sdim};
189249261Sdim
190263508Sdimstruct UnwrappedLineNode {
191263508Sdim  UnwrappedLineNode() : Tok(NULL) {}
192263508Sdim  UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
193263508Sdim
194263508Sdim  FormatToken *Tok;
195263508Sdim  SmallVector<UnwrappedLine, 0> Children;
196263508Sdim};
197263508Sdim
198263508Sdiminline UnwrappedLine::UnwrappedLine()
199263508Sdim    : Level(0), InPPDirective(false), MustBeDeclaration(false) {}
200263508Sdim
201249261Sdim} // end namespace format
202249261Sdim} // end namespace clang
203249261Sdim
204249261Sdim#endif // LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
205