1193326Sed//===--- Token.h - Token interface ------------------------------*- C++ -*-===// 2193326Sed// 3193326Sed// The LLVM Compiler Infrastructure 4193326Sed// 5193326Sed// This file is distributed under the University of Illinois Open Source 6193326Sed// License. See LICENSE.TXT for details. 7193326Sed// 8193326Sed//===----------------------------------------------------------------------===// 9193326Sed// 10193326Sed// This file defines the Token interface. 11193326Sed// 12193326Sed//===----------------------------------------------------------------------===// 13193326Sed 14193326Sed#ifndef LLVM_CLANG_TOKEN_H 15193326Sed#define LLVM_CLANG_TOKEN_H 16193326Sed 17249423Sdim#include "clang/Basic/OperatorKinds.h" 18249423Sdim#include "clang/Basic/SourceLocation.h" 19193326Sed#include "clang/Basic/TemplateKinds.h" 20193326Sed#include "clang/Basic/TokenKinds.h" 21193326Sed#include <cstdlib> 22193326Sed 23193326Sednamespace clang { 24193326Sed 25193326Sedclass IdentifierInfo; 26193326Sed 27193326Sed/// Token - This structure provides full information about a lexed token. 28193326Sed/// It is not intended to be space efficient, it is intended to return as much 29193326Sed/// information as possible about each returned token. This is expected to be 30193326Sed/// compressed into a smaller form if memory footprint is important. 31193326Sed/// 32193326Sed/// The parser can create a special "annotation token" representing a stream of 33193326Sed/// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>" 34193326Sed/// can be represented by a single typename annotation token that carries 35193326Sed/// information about the SourceRange of the tokens and the type object. 36193326Sedclass Token { 37193326Sed /// The location of the token. 38193326Sed SourceLocation Loc; 39193326Sed 40193326Sed // Conceptually these next two fields could be in a union. However, this 41193326Sed // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical 42193326Sed // routine. Keeping as separate members with casts until a more beautiful fix 43193326Sed // presents itself. 44193326Sed 45193326Sed /// UintData - This holds either the length of the token text, when 46193326Sed /// a normal token, or the end of the SourceRange when an annotation 47193326Sed /// token. 48193326Sed unsigned UintData; 49193326Sed 50193326Sed /// PtrData - This is a union of four different pointer types, which depends 51193326Sed /// on what type of token this is: 52193326Sed /// Identifiers, keywords, etc: 53193326Sed /// This is an IdentifierInfo*, which contains the uniqued identifier 54193326Sed /// spelling. 55193326Sed /// Literals: isLiteral() returns true. 56193326Sed /// This is a pointer to the start of the token in a text buffer, which 57193326Sed /// may be dirty (have trigraphs / escaped newlines). 58193326Sed /// Annotations (resolved type names, C++ scopes, etc): isAnnotation(). 59193326Sed /// This is a pointer to sema-specific data for the annotation token. 60193326Sed /// Other: 61193326Sed /// This is null. 62193326Sed void *PtrData; 63193326Sed 64193326Sed /// Kind - The actual flavor of token this is. 65193326Sed /// 66224145Sdim unsigned short Kind; 67198092Srdivacky 68193326Sed /// Flags - Bits we track about this token, members of the TokenFlags enum. 69198893Srdivacky unsigned char Flags; 70193326Sedpublic: 71198092Srdivacky 72193326Sed // Various flags set per token: 73193326Sed enum TokenFlags { 74263508Sdim StartOfLine = 0x01, // At start of line or only after whitespace 75263508Sdim // (considering the line after macro expansion). 76263508Sdim LeadingSpace = 0x02, // Whitespace exists before this token (considering 77263508Sdim // whitespace after macro expansion). 78193326Sed DisableExpand = 0x04, // This identifier may never be macro expanded. 79249423Sdim NeedsCleaning = 0x08, // Contained an escaped newline or trigraph. 80234353Sdim LeadingEmptyMacro = 0x10, // Empty macro exists before this token. 81249423Sdim HasUDSuffix = 0x20, // This string or character literal has a ud-suffix. 82263508Sdim HasUCN = 0x40, // This identifier contains a UCN. 83263508Sdim IgnoredComma = 0x80 // This comma is not a macro argument separator (MS). 84193326Sed }; 85193326Sed 86193326Sed tok::TokenKind getKind() const { return (tok::TokenKind)Kind; } 87193326Sed void setKind(tok::TokenKind K) { Kind = K; } 88198092Srdivacky 89193326Sed /// is/isNot - Predicates to check if this token is a specific kind, as in 90193326Sed /// "if (Tok.is(tok::l_brace)) {...}". 91193326Sed bool is(tok::TokenKind K) const { return Kind == (unsigned) K; } 92193326Sed bool isNot(tok::TokenKind K) const { return Kind != (unsigned) K; } 93193326Sed 94239462Sdim /// \brief Return true if this is a raw identifier (when lexing 95218893Sdim /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode). 96218893Sdim bool isAnyIdentifier() const { 97243830Sdim return tok::isAnyIdentifier(getKind()); 98218893Sdim } 99218893Sdim 100243830Sdim /// \brief Return true if this is a "literal", like a numeric 101193326Sed /// constant, string, etc. 102193326Sed bool isLiteral() const { 103243830Sdim return tok::isLiteral(getKind()); 104193326Sed } 105193326Sed 106243830Sdim /// \brief Return true if this is any of tok::annot_* kind tokens. 107198092Srdivacky bool isAnnotation() const { 108243830Sdim return tok::isAnnotation(getKind()); 109193326Sed } 110198092Srdivacky 111239462Sdim /// \brief Return a source location identifier for the specified 112193326Sed /// offset in the current file. 113193326Sed SourceLocation getLocation() const { return Loc; } 114193326Sed unsigned getLength() const { 115193326Sed assert(!isAnnotation() && "Annotation tokens have no length field"); 116193326Sed return UintData; 117193326Sed } 118193326Sed 119193326Sed void setLocation(SourceLocation L) { Loc = L; } 120193326Sed void setLength(unsigned Len) { 121193326Sed assert(!isAnnotation() && "Annotation tokens have no length field"); 122193326Sed UintData = Len; 123193326Sed } 124193326Sed 125193326Sed SourceLocation getAnnotationEndLoc() const { 126193326Sed assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); 127193326Sed return SourceLocation::getFromRawEncoding(UintData); 128193326Sed } 129193326Sed void setAnnotationEndLoc(SourceLocation L) { 130193326Sed assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); 131193326Sed UintData = L.getRawEncoding(); 132193326Sed } 133193326Sed 134203955Srdivacky SourceLocation getLastLoc() const { 135203955Srdivacky return isAnnotation() ? getAnnotationEndLoc() : getLocation(); 136203955Srdivacky } 137203955Srdivacky 138239462Sdim /// \brief SourceRange of the group of tokens that this annotation token 139239462Sdim /// represents. 140193326Sed SourceRange getAnnotationRange() const { 141193326Sed return SourceRange(getLocation(), getAnnotationEndLoc()); 142193326Sed } 143193326Sed void setAnnotationRange(SourceRange R) { 144193326Sed setLocation(R.getBegin()); 145193326Sed setAnnotationEndLoc(R.getEnd()); 146193326Sed } 147198092Srdivacky 148193326Sed const char *getName() const { 149193326Sed return tok::getTokenName( (tok::TokenKind) Kind); 150193326Sed } 151198092Srdivacky 152239462Sdim /// \brief Reset all flags to cleared. 153193326Sed void startToken() { 154193326Sed Kind = tok::unknown; 155193326Sed Flags = 0; 156193326Sed PtrData = 0; 157210299Sed UintData = 0; 158193326Sed Loc = SourceLocation(); 159193326Sed } 160198092Srdivacky 161193326Sed IdentifierInfo *getIdentifierInfo() const { 162218893Sdim assert(isNot(tok::raw_identifier) && 163218893Sdim "getIdentifierInfo() on a tok::raw_identifier token!"); 164218893Sdim assert(!isAnnotation() && 165218893Sdim "getIdentifierInfo() on an annotation token!"); 166193326Sed if (isLiteral()) return 0; 167193326Sed return (IdentifierInfo*) PtrData; 168193326Sed } 169193326Sed void setIdentifierInfo(IdentifierInfo *II) { 170193326Sed PtrData = (void*) II; 171193326Sed } 172198092Srdivacky 173218893Sdim /// getRawIdentifierData - For a raw identifier token (i.e., an identifier 174218893Sdim /// lexed in raw mode), returns a pointer to the start of it in the text 175218893Sdim /// buffer if known, null otherwise. 176218893Sdim const char *getRawIdentifierData() const { 177218893Sdim assert(is(tok::raw_identifier)); 178218893Sdim return reinterpret_cast<const char*>(PtrData); 179218893Sdim } 180218893Sdim void setRawIdentifierData(const char *Ptr) { 181218893Sdim assert(is(tok::raw_identifier)); 182218893Sdim PtrData = const_cast<char*>(Ptr); 183218893Sdim } 184218893Sdim 185193326Sed /// getLiteralData - For a literal token (numeric constant, string, etc), this 186193326Sed /// returns a pointer to the start of it in the text buffer if known, null 187193326Sed /// otherwise. 188193326Sed const char *getLiteralData() const { 189193326Sed assert(isLiteral() && "Cannot get literal data of non-literal"); 190193326Sed return reinterpret_cast<const char*>(PtrData); 191193326Sed } 192193326Sed void setLiteralData(const char *Ptr) { 193193326Sed assert(isLiteral() && "Cannot set literal data of non-literal"); 194210299Sed PtrData = const_cast<char*>(Ptr); 195193326Sed } 196198092Srdivacky 197193326Sed void *getAnnotationValue() const { 198193326Sed assert(isAnnotation() && "Used AnnotVal on non-annotation token"); 199193326Sed return PtrData; 200193326Sed } 201193326Sed void setAnnotationValue(void *val) { 202193326Sed assert(isAnnotation() && "Used AnnotVal on non-annotation token"); 203193326Sed PtrData = val; 204193326Sed } 205198092Srdivacky 206239462Sdim /// \brief Set the specified flag. 207193326Sed void setFlag(TokenFlags Flag) { 208193326Sed Flags |= Flag; 209193326Sed } 210198092Srdivacky 211239462Sdim /// \brief Unset the specified flag. 212193326Sed void clearFlag(TokenFlags Flag) { 213193326Sed Flags &= ~Flag; 214193326Sed } 215198092Srdivacky 216239462Sdim /// \brief Return the internal represtation of the flags. 217239462Sdim /// 218239462Sdim /// This is only intended for low-level operations such as writing tokens to 219239462Sdim /// disk. 220193326Sed unsigned getFlags() const { 221193326Sed return Flags; 222193326Sed } 223193326Sed 224239462Sdim /// \brief Set a flag to either true or false. 225193326Sed void setFlagValue(TokenFlags Flag, bool Val) { 226198092Srdivacky if (Val) 227193326Sed setFlag(Flag); 228193326Sed else 229193326Sed clearFlag(Flag); 230193326Sed } 231198092Srdivacky 232193326Sed /// isAtStartOfLine - Return true if this token is at the start of a line. 233193326Sed /// 234193326Sed bool isAtStartOfLine() const { return (Flags & StartOfLine) ? true : false; } 235198092Srdivacky 236239462Sdim /// \brief Return true if this token has whitespace before it. 237193326Sed /// 238193326Sed bool hasLeadingSpace() const { return (Flags & LeadingSpace) ? true : false; } 239198092Srdivacky 240239462Sdim /// \brief Return true if this identifier token should never 241193326Sed /// be expanded in the future, due to C99 6.10.3.4p2. 242193326Sed bool isExpandDisabled() const { 243193326Sed return (Flags & DisableExpand) ? true : false; 244193326Sed } 245198092Srdivacky 246239462Sdim /// \brief Return true if we have an ObjC keyword identifier. 247193326Sed bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const; 248198092Srdivacky 249239462Sdim /// \brief Return the ObjC keyword kind. 250193326Sed tok::ObjCKeywordKind getObjCKeywordID() const; 251198092Srdivacky 252239462Sdim /// \brief Return true if this token has trigraphs or escaped newlines in it. 253193326Sed bool needsCleaning() const { return (Flags & NeedsCleaning) ? true : false; } 254218893Sdim 255218893Sdim /// \brief Return true if this token has an empty macro before it. 256218893Sdim /// 257218893Sdim bool hasLeadingEmptyMacro() const { 258218893Sdim return (Flags & LeadingEmptyMacro) ? true : false; 259218893Sdim } 260218893Sdim 261234353Sdim /// \brief Return true if this token is a string or character literal which 262234353Sdim /// has a ud-suffix. 263234353Sdim bool hasUDSuffix() const { return (Flags & HasUDSuffix) ? true : false; } 264249423Sdim 265249423Sdim /// Returns true if this token contains a universal character name. 266249423Sdim bool hasUCN() const { return (Flags & HasUCN) ? true : false; } 267193326Sed}; 268193326Sed 269239462Sdim/// \brief Information about the conditional stack (\#if directives) 270193326Sed/// currently active. 271193326Sedstruct PPConditionalInfo { 272239462Sdim /// \brief Location where the conditional started. 273193326Sed SourceLocation IfLoc; 274198092Srdivacky 275239462Sdim /// \brief True if this was contained in a skipping directive, e.g., 276239462Sdim /// in a "\#if 0" block. 277193326Sed bool WasSkipping; 278198092Srdivacky 279239462Sdim /// \brief True if we have emitted tokens already, and now we're in 280239462Sdim /// an \#else block or something. Only useful in Skipping blocks. 281193326Sed bool FoundNonSkip; 282198092Srdivacky 283239462Sdim /// \brief True if we've seen a \#else in this block. If so, 284239462Sdim /// \#elif/\#else directives are not allowed. 285193326Sed bool FoundElse; 286193326Sed}; 287193326Sed 288193326Sed} // end namespace clang 289193326Sed 290210299Sednamespace llvm { 291210299Sed template <> 292210299Sed struct isPodLike<clang::Token> { static const bool value = true; }; 293210299Sed} // end namespace llvm 294210299Sed 295193326Sed#endif 296