1249261Sdim//===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===// 2249261Sdim// 3249261Sdim// The LLVM Compiler Infrastructure 4249261Sdim// 5249261Sdim// This file is distributed under the University of Illinois Open Source 6249261Sdim// License. See LICENSE.TXT for details. 7249261Sdim// 8249261Sdim//===----------------------------------------------------------------------===// 9249261Sdim/// 10249261Sdim/// \file 11249261Sdim/// \brief This file contains the declaration of the UnwrappedLineParser, 12249261Sdim/// which turns a stream of tokens into UnwrappedLines. 13249261Sdim/// 14249261Sdim//===----------------------------------------------------------------------===// 15249261Sdim 16249261Sdim#ifndef LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H 17249261Sdim#define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H 18249261Sdim 19249261Sdim#include "clang/Basic/IdentifierTable.h" 20249261Sdim#include "clang/Format/Format.h" 21263508Sdim#include "FormatToken.h" 22249261Sdim#include <list> 23249261Sdim 24249261Sdimnamespace clang { 25249261Sdimnamespace format { 26249261Sdim 27263508Sdimstruct UnwrappedLineNode; 28249261Sdim 29249261Sdim/// \brief An unwrapped line is a sequence of \c Token, that we would like to 30249261Sdim/// put on a single line if there was no column limit. 31249261Sdim/// 32249261Sdim/// This is used as a main interface between the \c UnwrappedLineParser and the 33249261Sdim/// \c UnwrappedLineFormatter. The key property is that changing the formatting 34249261Sdim/// within an unwrapped line does not affect any other unwrapped lines. 35249261Sdimstruct UnwrappedLine { 36263508Sdim UnwrappedLine(); 37249261Sdim 38249261Sdim // FIXME: Don't use std::list here. 39249261Sdim /// \brief The \c Tokens comprising this \c UnwrappedLine. 40263508Sdim std::list<UnwrappedLineNode> Tokens; 41249261Sdim 42249261Sdim /// \brief The indent level of the \c UnwrappedLine. 43249261Sdim unsigned Level; 44249261Sdim 45249261Sdim /// \brief Whether this \c UnwrappedLine is part of a preprocessor directive. 46249261Sdim bool InPPDirective; 47249261Sdim 48249261Sdim bool MustBeDeclaration; 49249261Sdim}; 50249261Sdim 51249261Sdimclass UnwrappedLineConsumer { 52249261Sdimpublic: 53263508Sdim virtual ~UnwrappedLineConsumer() {} 54249261Sdim virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0; 55263508Sdim virtual void finishRun() = 0; 56249261Sdim}; 57249261Sdim 58263508Sdimclass FormatTokenSource; 59249261Sdim 60249261Sdimclass UnwrappedLineParser { 61249261Sdimpublic: 62263508Sdim UnwrappedLineParser(const FormatStyle &Style, ArrayRef<FormatToken *> Tokens, 63249261Sdim UnwrappedLineConsumer &Callback); 64249261Sdim 65249261Sdim /// Returns true in case of a structural error. 66249261Sdim bool parse(); 67249261Sdim 68249261Sdimprivate: 69263508Sdim void reset(); 70251662Sdim void parseFile(); 71251662Sdim void parseLevel(bool HasOpeningBrace); 72263508Sdim void parseBlock(bool MustBeDeclaration, bool AddLevel = true, 73263508Sdim bool MunchSemi = true); 74263508Sdim void parseChildBlock(); 75249261Sdim void parsePPDirective(); 76249261Sdim void parsePPDefine(); 77263508Sdim void parsePPIf(bool IfDef); 78263508Sdim void parsePPElIf(); 79263508Sdim void parsePPElse(); 80263508Sdim void parsePPEndIf(); 81249261Sdim void parsePPUnknown(); 82249261Sdim void parseStructuralElement(); 83263508Sdim bool tryToParseBracedList(); 84263508Sdim bool parseBracedList(bool ContinueOnSemicolons = false); 85249261Sdim void parseReturn(); 86249261Sdim void parseParens(); 87249261Sdim void parseIfThenElse(); 88249261Sdim void parseForOrWhileLoop(); 89249261Sdim void parseDoWhile(); 90249261Sdim void parseLabel(); 91249261Sdim void parseCaseLabel(); 92249261Sdim void parseSwitch(); 93249261Sdim void parseNamespace(); 94249261Sdim void parseAccessSpecifier(); 95249261Sdim void parseEnum(); 96249261Sdim void parseRecord(); 97249261Sdim void parseObjCProtocolList(); 98249261Sdim void parseObjCUntilAtEnd(); 99249261Sdim void parseObjCInterfaceOrImplementation(); 100249261Sdim void parseObjCProtocol(); 101263508Sdim void tryToParseLambda(); 102263508Sdim bool tryToParseLambdaIntroducer(); 103249261Sdim void addUnwrappedLine(); 104249261Sdim bool eof() const; 105249261Sdim void nextToken(); 106249261Sdim void readToken(); 107249261Sdim void flushComments(bool NewlineBeforeNext); 108263508Sdim void pushToken(FormatToken *Tok); 109263508Sdim void calculateBraceTypes(); 110263508Sdim void pushPPConditional(); 111249261Sdim 112249261Sdim // FIXME: We are constantly running into bugs where Line.Level is incorrectly 113249261Sdim // subtracted from beyond 0. Introduce a method to subtract from Line.Level 114249261Sdim // and use that everywhere in the Parser. 115249261Sdim OwningPtr<UnwrappedLine> Line; 116249261Sdim 117249261Sdim // Comments are sorted into unwrapped lines by whether they are in the same 118249261Sdim // line as the previous token, or not. If not, they belong to the next token. 119249261Sdim // Since the next token might already be in a new unwrapped line, we need to 120249261Sdim // store the comments belonging to that token. 121263508Sdim SmallVector<FormatToken *, 1> CommentsBeforeNextToken; 122263508Sdim FormatToken *FormatTok; 123249261Sdim bool MustBreakBeforeNextToken; 124249261Sdim 125249261Sdim // The parsed lines. Only added to through \c CurrentLines. 126263508Sdim SmallVector<UnwrappedLine, 8> Lines; 127249261Sdim 128249261Sdim // Preprocessor directives are parsed out-of-order from other unwrapped lines. 129249261Sdim // Thus, we need to keep a list of preprocessor directives to be reported 130249261Sdim // after an unwarpped line that has been started was finished. 131263508Sdim SmallVector<UnwrappedLine, 4> PreprocessorDirectives; 132249261Sdim 133249261Sdim // New unwrapped lines are added via CurrentLines. 134249261Sdim // Usually points to \c &Lines. While parsing a preprocessor directive when 135249261Sdim // there is an unfinished previous unwrapped line, will point to 136249261Sdim // \c &PreprocessorDirectives. 137263508Sdim SmallVectorImpl<UnwrappedLine> *CurrentLines; 138249261Sdim 139249261Sdim // We store for each line whether it must be a declaration depending on 140249261Sdim // whether we are in a compound statement or not. 141249261Sdim std::vector<bool> DeclarationScopeStack; 142249261Sdim 143251662Sdim // Will be true if we encounter an error that leads to possibily incorrect 144251662Sdim // indentation levels. 145251662Sdim bool StructuralError; 146251662Sdim 147249261Sdim const FormatStyle &Style; 148249261Sdim FormatTokenSource *Tokens; 149249261Sdim UnwrappedLineConsumer &Callback; 150249261Sdim 151263508Sdim // FIXME: This is a temporary measure until we have reworked the ownership 152263508Sdim // of the format tokens. The goal is to have the actual tokens created and 153263508Sdim // owned outside of and handed into the UnwrappedLineParser. 154263508Sdim ArrayRef<FormatToken *> AllTokens; 155263508Sdim 156263508Sdim // Represents preprocessor branch type, so we can find matching 157263508Sdim // #if/#else/#endif directives. 158263508Sdim enum PPBranchKind { 159263508Sdim PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0 160263508Sdim PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0 161263508Sdim }; 162263508Sdim 163263508Sdim // Keeps a stack of currently active preprocessor branching directives. 164263508Sdim SmallVector<PPBranchKind, 16> PPStack; 165263508Sdim 166263508Sdim // The \c UnwrappedLineParser re-parses the code for each combination 167263508Sdim // of preprocessor branches that can be taken. 168263508Sdim // To that end, we take the same branch (#if, #else, or one of the #elif 169263508Sdim // branches) for each nesting level of preprocessor branches. 170263508Sdim // \c PPBranchLevel stores the current nesting level of preprocessor 171263508Sdim // branches during one pass over the code. 172263508Sdim int PPBranchLevel; 173263508Sdim 174263508Sdim // Contains the current branch (#if, #else or one of the #elif branches) 175263508Sdim // for each nesting level. 176263508Sdim SmallVector<int, 8> PPLevelBranchIndex; 177263508Sdim 178263508Sdim // Contains the maximum number of branches at each nesting level. 179263508Sdim SmallVector<int, 8> PPLevelBranchCount; 180263508Sdim 181263508Sdim // Contains the number of branches per nesting level we are currently 182263508Sdim // in while parsing a preprocessor branch sequence. 183263508Sdim // This is used to update PPLevelBranchCount at the end of a branch 184263508Sdim // sequence. 185263508Sdim std::stack<int> PPChainBranchIndex; 186263508Sdim 187249261Sdim friend class ScopedLineState; 188249261Sdim}; 189249261Sdim 190263508Sdimstruct UnwrappedLineNode { 191263508Sdim UnwrappedLineNode() : Tok(NULL) {} 192263508Sdim UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {} 193263508Sdim 194263508Sdim FormatToken *Tok; 195263508Sdim SmallVector<UnwrappedLine, 0> Children; 196263508Sdim}; 197263508Sdim 198263508Sdiminline UnwrappedLine::UnwrappedLine() 199263508Sdim : Level(0), InPPDirective(false), MustBeDeclaration(false) {} 200263508Sdim 201249261Sdim} // end namespace format 202249261Sdim} // end namespace clang 203249261Sdim 204249261Sdim#endif // LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H 205