UnwrappedLineParser.h revision 251662
1//===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file contains the declaration of the UnwrappedLineParser, 12/// which turns a stream of tokens into UnwrappedLines. 13/// 14//===----------------------------------------------------------------------===// 15 16#ifndef LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H 17#define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H 18 19#include "clang/Basic/IdentifierTable.h" 20#include "clang/Basic/SourceManager.h" 21#include "clang/Format/Format.h" 22#include "clang/Lex/Lexer.h" 23#include <list> 24 25namespace clang { 26 27class DiagnosticsEngine; 28 29namespace format { 30 31/// \brief A wrapper around a \c Token storing information about the 32/// whitespace characters preceeding it. 33struct FormatToken { 34 FormatToken() 35 : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0), 36 LastNewlineOffset(0), TokenLength(0), IsFirst(false), 37 MustBreakBefore(false), TrailingWhiteSpaceLength(0) {} 38 39 /// \brief The \c Token. 40 Token Tok; 41 42 /// \brief The number of newlines immediately before the \c Token. 43 /// 44 /// This can be used to determine what the user wrote in the original code 45 /// and thereby e.g. leave an empty line between two function definitions. 46 unsigned NewlinesBefore; 47 48 /// \brief Whether there is at least one unescaped newline before the \c 49 /// Token. 50 bool HasUnescapedNewline; 51 52 /// \brief The location of the start of the whitespace immediately preceeding 53 /// the \c Token. 54 /// 55 /// Used together with \c WhiteSpaceLength to create a \c Replacement. 56 SourceLocation WhiteSpaceStart; 57 58 /// \brief The length in characters of the whitespace immediately preceeding 59 /// the \c Token. 60 unsigned WhiteSpaceLength; 61 62 /// \brief The offset just past the last '\n' in this token's leading 63 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. 64 unsigned LastNewlineOffset; 65 66 /// \brief The length of the non-whitespace parts of the token. This is 67 /// necessary because we need to handle escaped newlines that are stored 68 /// with the token. 69 unsigned TokenLength; 70 71 /// \brief Indicates that this is the first token. 72 bool IsFirst; 73 74 /// \brief Whether there must be a line break before this token. 75 /// 76 /// This happens for example when a preprocessor directive ended directly 77 /// before the token. 78 bool MustBreakBefore; 79 80 /// \brief Number of characters of trailing whitespace. 81 unsigned TrailingWhiteSpaceLength; 82 83 /// \brief Returns actual token start location without leading escaped 84 /// newlines and whitespace. 85 /// 86 /// This can be different to Tok.getLocation(), which includes leading escaped 87 /// newlines. 88 SourceLocation getStartOfNonWhitespace() const { 89 return WhiteSpaceStart.getLocWithOffset(WhiteSpaceLength); 90 } 91}; 92 93/// \brief An unwrapped line is a sequence of \c Token, that we would like to 94/// put on a single line if there was no column limit. 95/// 96/// This is used as a main interface between the \c UnwrappedLineParser and the 97/// \c UnwrappedLineFormatter. The key property is that changing the formatting 98/// within an unwrapped line does not affect any other unwrapped lines. 99struct UnwrappedLine { 100 UnwrappedLine() : Level(0), InPPDirective(false), MustBeDeclaration(false) { 101 } 102 103 // FIXME: Don't use std::list here. 104 /// \brief The \c Tokens comprising this \c UnwrappedLine. 105 std::list<FormatToken> Tokens; 106 107 /// \brief The indent level of the \c UnwrappedLine. 108 unsigned Level; 109 110 /// \brief Whether this \c UnwrappedLine is part of a preprocessor directive. 111 bool InPPDirective; 112 113 bool MustBeDeclaration; 114}; 115 116class UnwrappedLineConsumer { 117public: 118 virtual ~UnwrappedLineConsumer() { 119 } 120 virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0; 121}; 122 123class FormatTokenSource { 124public: 125 virtual ~FormatTokenSource() { 126 } 127 virtual FormatToken getNextToken() = 0; 128}; 129 130class UnwrappedLineParser { 131public: 132 UnwrappedLineParser(clang::DiagnosticsEngine &Diag, const FormatStyle &Style, 133 FormatTokenSource &Tokens, 134 UnwrappedLineConsumer &Callback); 135 136 /// Returns true in case of a structural error. 137 bool parse(); 138 139private: 140 void parseFile(); 141 void parseLevel(bool HasOpeningBrace); 142 void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1); 143 void parsePPDirective(); 144 void parsePPDefine(); 145 void parsePPUnknown(); 146 void parseStructuralElement(); 147 void parseBracedList(); 148 void parseReturn(); 149 void parseParens(); 150 void parseIfThenElse(); 151 void parseForOrWhileLoop(); 152 void parseDoWhile(); 153 void parseLabel(); 154 void parseCaseLabel(); 155 void parseSwitch(); 156 void parseNamespace(); 157 void parseAccessSpecifier(); 158 void parseEnum(); 159 void parseRecord(); 160 void parseObjCProtocolList(); 161 void parseObjCUntilAtEnd(); 162 void parseObjCInterfaceOrImplementation(); 163 void parseObjCProtocol(); 164 void addUnwrappedLine(); 165 bool eof() const; 166 void nextToken(); 167 void readToken(); 168 void flushComments(bool NewlineBeforeNext); 169 void pushToken(const FormatToken &Tok); 170 171 // FIXME: We are constantly running into bugs where Line.Level is incorrectly 172 // subtracted from beyond 0. Introduce a method to subtract from Line.Level 173 // and use that everywhere in the Parser. 174 OwningPtr<UnwrappedLine> Line; 175 176 // Comments are sorted into unwrapped lines by whether they are in the same 177 // line as the previous token, or not. If not, they belong to the next token. 178 // Since the next token might already be in a new unwrapped line, we need to 179 // store the comments belonging to that token. 180 SmallVector<FormatToken, 1> CommentsBeforeNextToken; 181 FormatToken FormatTok; 182 bool MustBreakBeforeNextToken; 183 184 // The parsed lines. Only added to through \c CurrentLines. 185 std::vector<UnwrappedLine> Lines; 186 187 // Preprocessor directives are parsed out-of-order from other unwrapped lines. 188 // Thus, we need to keep a list of preprocessor directives to be reported 189 // after an unwarpped line that has been started was finished. 190 std::vector<UnwrappedLine> PreprocessorDirectives; 191 192 // New unwrapped lines are added via CurrentLines. 193 // Usually points to \c &Lines. While parsing a preprocessor directive when 194 // there is an unfinished previous unwrapped line, will point to 195 // \c &PreprocessorDirectives. 196 std::vector<UnwrappedLine> *CurrentLines; 197 198 // We store for each line whether it must be a declaration depending on 199 // whether we are in a compound statement or not. 200 std::vector<bool> DeclarationScopeStack; 201 202 // Will be true if we encounter an error that leads to possibily incorrect 203 // indentation levels. 204 bool StructuralError; 205 206 clang::DiagnosticsEngine &Diag; 207 const FormatStyle &Style; 208 FormatTokenSource *Tokens; 209 UnwrappedLineConsumer &Callback; 210 211 friend class ScopedLineState; 212}; 213 214} // end namespace format 215} // end namespace clang 216 217#endif // LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H 218