TokenAnnotator.h revision 249261
1249261Sdim//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// 2249261Sdim// 3249261Sdim// The LLVM Compiler Infrastructure 4249261Sdim// 5249261Sdim// This file is distributed under the University of Illinois Open Source 6249261Sdim// License. See LICENSE.TXT for details. 7249261Sdim// 8249261Sdim//===----------------------------------------------------------------------===// 9249261Sdim/// 10249261Sdim/// \file 11249261Sdim/// \brief This file implements a token annotator, i.e. creates 12249261Sdim/// \c AnnotatedTokens out of \c FormatTokens with required extra information. 13249261Sdim/// 14249261Sdim//===----------------------------------------------------------------------===// 15249261Sdim 16249261Sdim#ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 17249261Sdim#define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 18249261Sdim 19249261Sdim#include "UnwrappedLineParser.h" 20249261Sdim#include "clang/Basic/OperatorPrecedence.h" 21249261Sdim#include "clang/Format/Format.h" 22249261Sdim#include <string> 23249261Sdim 24249261Sdimnamespace clang { 25249261Sdimclass Lexer; 26249261Sdimclass SourceManager; 27249261Sdim 28249261Sdimnamespace format { 29249261Sdim 30249261Sdimenum TokenType { 31249261Sdim TT_BinaryOperator, 32249261Sdim TT_BlockComment, 33249261Sdim TT_CastRParen, 34249261Sdim TT_ConditionalExpr, 35249261Sdim TT_CtorInitializerColon, 36249261Sdim TT_ImplicitStringLiteral, 37249261Sdim TT_InlineASMColon, 38249261Sdim TT_InheritanceColon, 39249261Sdim TT_LineComment, 40249261Sdim TT_ObjCArrayLiteral, 41249261Sdim TT_ObjCBlockLParen, 42249261Sdim TT_ObjCDecl, 43249261Sdim TT_ObjCForIn, 44249261Sdim TT_ObjCMethodExpr, 45249261Sdim TT_ObjCMethodSpecifier, 46249261Sdim TT_ObjCProperty, 47249261Sdim TT_ObjCSelectorName, 48249261Sdim TT_OverloadedOperatorLParen, 49249261Sdim TT_PointerOrReference, 50249261Sdim TT_PureVirtualSpecifier, 51249261Sdim TT_RangeBasedForLoopColon, 52249261Sdim TT_StartOfName, 53249261Sdim TT_TemplateCloser, 54249261Sdim TT_TemplateOpener, 55249261Sdim TT_TrailingUnaryOperator, 56249261Sdim TT_UnaryOperator, 57249261Sdim TT_Unknown 58249261Sdim}; 59249261Sdim 60249261Sdimenum LineType { 61249261Sdim LT_Invalid, 62249261Sdim LT_Other, 63249261Sdim LT_BuilderTypeCall, 64249261Sdim LT_PreprocessorDirective, 65249261Sdim LT_VirtualFunctionDecl, 66249261Sdim LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 67249261Sdim LT_ObjCMethodDecl, 68249261Sdim LT_ObjCProperty // An @property line. 69249261Sdim}; 70249261Sdim 71249261Sdimclass AnnotatedToken { 72249261Sdimpublic: 73249261Sdim explicit AnnotatedToken(const FormatToken &FormatTok) 74249261Sdim : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0), 75249261Sdim CanBreakBefore(false), MustBreakBefore(false), 76249261Sdim ClosesTemplateDeclaration(false), MatchingParen(NULL), 77249261Sdim ParameterCount(0), BindingStrength(0), SplitPenalty(0), 78249261Sdim LongestObjCSelectorName(0), Parent(NULL), FakeLParens(0), 79249261Sdim FakeRParens(0), LastInChainOfCalls(false), 80249261Sdim PartOfMultiVariableDeclStmt(false) {} 81249261Sdim 82249261Sdim bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); } 83249261Sdim 84249261Sdim bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { 85249261Sdim return is(K1) || is(K2); 86249261Sdim } 87249261Sdim 88249261Sdim bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const { 89249261Sdim return is(K1) || is(K2) || is(K3); 90249261Sdim } 91249261Sdim 92249261Sdim bool isOneOf( 93249261Sdim tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3, 94249261Sdim tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS, 95249261Sdim tok::TokenKind K6 = tok::NUM_TOKENS, tok::TokenKind K7 = tok::NUM_TOKENS, 96249261Sdim tok::TokenKind K8 = tok::NUM_TOKENS, tok::TokenKind K9 = tok::NUM_TOKENS, 97249261Sdim tok::TokenKind K10 = tok::NUM_TOKENS, 98249261Sdim tok::TokenKind K11 = tok::NUM_TOKENS, 99249261Sdim tok::TokenKind K12 = tok::NUM_TOKENS) const { 100249261Sdim return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) || 101249261Sdim is(K8) || is(K9) || is(K10) || is(K11) || is(K12); 102249261Sdim } 103249261Sdim 104249261Sdim bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); } 105249261Sdim 106249261Sdim bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 107249261Sdim return FormatTok.Tok.isObjCAtKeyword(Kind); 108249261Sdim } 109249261Sdim 110249261Sdim bool isAccessSpecifier(bool ColonRequired = true) const { 111249261Sdim return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && 112249261Sdim (!ColonRequired || 113249261Sdim (!Children.empty() && Children[0].is(tok::colon))); 114249261Sdim } 115249261Sdim 116249261Sdim bool isObjCAccessSpecifier() const { 117249261Sdim return is(tok::at) && !Children.empty() && 118249261Sdim (Children[0].isObjCAtKeyword(tok::objc_public) || 119249261Sdim Children[0].isObjCAtKeyword(tok::objc_protected) || 120249261Sdim Children[0].isObjCAtKeyword(tok::objc_package) || 121249261Sdim Children[0].isObjCAtKeyword(tok::objc_private)); 122249261Sdim } 123249261Sdim 124249261Sdim FormatToken FormatTok; 125249261Sdim 126249261Sdim TokenType Type; 127249261Sdim 128249261Sdim unsigned SpacesRequiredBefore; 129249261Sdim bool CanBreakBefore; 130249261Sdim bool MustBreakBefore; 131249261Sdim 132249261Sdim bool ClosesTemplateDeclaration; 133249261Sdim 134249261Sdim AnnotatedToken *MatchingParen; 135249261Sdim 136249261Sdim /// \brief Number of parameters, if this is "(", "[" or "<". 137249261Sdim /// 138249261Sdim /// This is initialized to 1 as we don't need to distinguish functions with 139249261Sdim /// 0 parameters from functions with 1 parameter. Thus, we can simply count 140249261Sdim /// the number of commas. 141249261Sdim unsigned ParameterCount; 142249261Sdim 143249261Sdim /// \brief The total length of the line up to and including this token. 144249261Sdim unsigned TotalLength; 145249261Sdim 146249261Sdim // FIXME: Come up with a 'cleaner' concept. 147249261Sdim /// \brief The binding strength of a token. This is a combined value of 148249261Sdim /// operator precedence, parenthesis nesting, etc. 149249261Sdim unsigned BindingStrength; 150249261Sdim 151249261Sdim /// \brief Penalty for inserting a line break before this token. 152249261Sdim unsigned SplitPenalty; 153249261Sdim 154249261Sdim /// \brief If this is the first ObjC selector name in an ObjC method 155249261Sdim /// definition or call, this contains the length of the longest name. 156249261Sdim unsigned LongestObjCSelectorName; 157249261Sdim 158249261Sdim std::vector<AnnotatedToken> Children; 159249261Sdim AnnotatedToken *Parent; 160249261Sdim 161249261Sdim /// \brief Insert this many fake ( before this token for correct indentation. 162249261Sdim unsigned FakeLParens; 163249261Sdim /// \brief Insert this many fake ) after this token for correct indentation. 164249261Sdim unsigned FakeRParens; 165249261Sdim 166249261Sdim /// \brief Is this the last "." or "->" in a builder-type call? 167249261Sdim bool LastInChainOfCalls; 168249261Sdim 169249261Sdim /// \brief Is this token part of a \c DeclStmt defining multiple variables? 170249261Sdim /// 171249261Sdim /// Only set if \c Type == \c TT_StartOfName. 172249261Sdim bool PartOfMultiVariableDeclStmt; 173249261Sdim 174249261Sdim const AnnotatedToken *getPreviousNoneComment() const { 175249261Sdim AnnotatedToken *Tok = Parent; 176249261Sdim while (Tok != NULL && Tok->is(tok::comment)) 177249261Sdim Tok = Tok->Parent; 178249261Sdim return Tok; 179249261Sdim } 180249261Sdim}; 181249261Sdim 182249261Sdimclass AnnotatedLine { 183249261Sdimpublic: 184249261Sdim AnnotatedLine(const UnwrappedLine &Line) 185249261Sdim : First(Line.Tokens.front()), Level(Line.Level), 186249261Sdim InPPDirective(Line.InPPDirective), 187249261Sdim MustBeDeclaration(Line.MustBeDeclaration), 188249261Sdim MightBeFunctionDecl(false) { 189249261Sdim assert(!Line.Tokens.empty()); 190249261Sdim AnnotatedToken *Current = &First; 191249261Sdim for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(), 192249261Sdim E = Line.Tokens.end(); 193249261Sdim I != E; ++I) { 194249261Sdim Current->Children.push_back(AnnotatedToken(*I)); 195249261Sdim Current->Children[0].Parent = Current; 196249261Sdim Current = &Current->Children[0]; 197249261Sdim } 198249261Sdim Last = Current; 199249261Sdim } 200249261Sdim AnnotatedLine(const AnnotatedLine &Other) 201249261Sdim : First(Other.First), Type(Other.Type), Level(Other.Level), 202249261Sdim InPPDirective(Other.InPPDirective), 203249261Sdim MustBeDeclaration(Other.MustBeDeclaration), 204249261Sdim MightBeFunctionDecl(Other.MightBeFunctionDecl) { 205249261Sdim Last = &First; 206249261Sdim while (!Last->Children.empty()) { 207249261Sdim Last->Children[0].Parent = Last; 208249261Sdim Last = &Last->Children[0]; 209249261Sdim } 210249261Sdim } 211249261Sdim 212249261Sdim AnnotatedToken First; 213249261Sdim AnnotatedToken *Last; 214249261Sdim 215249261Sdim LineType Type; 216249261Sdim unsigned Level; 217249261Sdim bool InPPDirective; 218249261Sdim bool MustBeDeclaration; 219249261Sdim bool MightBeFunctionDecl; 220249261Sdim}; 221249261Sdim 222249261Sdiminline prec::Level getPrecedence(const AnnotatedToken &Tok) { 223249261Sdim return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true); 224249261Sdim} 225249261Sdim 226249261Sdim/// \brief Determines extra information about the tokens comprising an 227249261Sdim/// \c UnwrappedLine. 228249261Sdimclass TokenAnnotator { 229249261Sdimpublic: 230249261Sdim TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex, 231249261Sdim IdentifierInfo &Ident_in) 232249261Sdim : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) { 233249261Sdim } 234249261Sdim 235249261Sdim void annotate(AnnotatedLine &Line); 236249261Sdim void calculateFormattingInformation(AnnotatedLine &Line); 237249261Sdim 238249261Sdimprivate: 239249261Sdim /// \brief Calculate the penalty for splitting before \c Tok. 240249261Sdim unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok); 241249261Sdim 242249261Sdim bool spaceRequiredBetween(const AnnotatedLine &Line, 243249261Sdim const AnnotatedToken &Left, 244249261Sdim const AnnotatedToken &Right); 245249261Sdim 246249261Sdim bool spaceRequiredBefore(const AnnotatedLine &Line, 247249261Sdim const AnnotatedToken &Tok); 248249261Sdim 249249261Sdim bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right); 250249261Sdim 251249261Sdim const FormatStyle &Style; 252249261Sdim SourceManager &SourceMgr; 253249261Sdim Lexer &Lex; 254249261Sdim 255249261Sdim // Contextual keywords: 256249261Sdim IdentifierInfo &Ident_in; 257249261Sdim}; 258249261Sdim 259249261Sdim} // end namespace format 260249261Sdim} // end namespace clang 261249261Sdim 262249261Sdim#endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H 263