1193326Sed//===--- Token.h - Token interface ------------------------------*- C++ -*-===//
2193326Sed//
3193326Sed//                     The LLVM Compiler Infrastructure
4193326Sed//
5193326Sed// This file is distributed under the University of Illinois Open Source
6193326Sed// License. See LICENSE.TXT for details.
7193326Sed//
8193326Sed//===----------------------------------------------------------------------===//
9193326Sed//
10193326Sed//  This file defines the Token interface.
11193326Sed//
12193326Sed//===----------------------------------------------------------------------===//
13193326Sed
14193326Sed#ifndef LLVM_CLANG_TOKEN_H
15193326Sed#define LLVM_CLANG_TOKEN_H
16193326Sed
17249423Sdim#include "clang/Basic/OperatorKinds.h"
18249423Sdim#include "clang/Basic/SourceLocation.h"
19193326Sed#include "clang/Basic/TemplateKinds.h"
20193326Sed#include "clang/Basic/TokenKinds.h"
21193326Sed#include <cstdlib>
22193326Sed
23193326Sednamespace clang {
24193326Sed
25193326Sedclass IdentifierInfo;
26193326Sed
27193326Sed/// Token - This structure provides full information about a lexed token.
28193326Sed/// It is not intended to be space efficient, it is intended to return as much
29193326Sed/// information as possible about each returned token.  This is expected to be
30193326Sed/// compressed into a smaller form if memory footprint is important.
31193326Sed///
32193326Sed/// The parser can create a special "annotation token" representing a stream of
33193326Sed/// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>"
34193326Sed/// can be represented by a single typename annotation token that carries
35193326Sed/// information about the SourceRange of the tokens and the type object.
36193326Sedclass Token {
37193326Sed  /// The location of the token.
38193326Sed  SourceLocation Loc;
39193326Sed
40193326Sed  // Conceptually these next two fields could be in a union.  However, this
41193326Sed  // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
42193326Sed  // routine. Keeping as separate members with casts until a more beautiful fix
43193326Sed  // presents itself.
44193326Sed
45193326Sed  /// UintData - This holds either the length of the token text, when
46193326Sed  /// a normal token, or the end of the SourceRange when an annotation
47193326Sed  /// token.
48193326Sed  unsigned UintData;
49193326Sed
50193326Sed  /// PtrData - This is a union of four different pointer types, which depends
51193326Sed  /// on what type of token this is:
52193326Sed  ///  Identifiers, keywords, etc:
53193326Sed  ///    This is an IdentifierInfo*, which contains the uniqued identifier
54193326Sed  ///    spelling.
55193326Sed  ///  Literals:  isLiteral() returns true.
56193326Sed  ///    This is a pointer to the start of the token in a text buffer, which
57193326Sed  ///    may be dirty (have trigraphs / escaped newlines).
58193326Sed  ///  Annotations (resolved type names, C++ scopes, etc): isAnnotation().
59193326Sed  ///    This is a pointer to sema-specific data for the annotation token.
60193326Sed  ///  Other:
61193326Sed  ///    This is null.
62193326Sed  void *PtrData;
63193326Sed
64193326Sed  /// Kind - The actual flavor of token this is.
65193326Sed  ///
66224145Sdim  unsigned short Kind;
67198092Srdivacky
68193326Sed  /// Flags - Bits we track about this token, members of the TokenFlags enum.
69198893Srdivacky  unsigned char Flags;
70193326Sedpublic:
71198092Srdivacky
72193326Sed  // Various flags set per token:
73193326Sed  enum TokenFlags {
74263508Sdim    StartOfLine   = 0x01,  // At start of line or only after whitespace
75263508Sdim                           // (considering the line after macro expansion).
76263508Sdim    LeadingSpace  = 0x02,  // Whitespace exists before this token (considering
77263508Sdim                           // whitespace after macro expansion).
78193326Sed    DisableExpand = 0x04,  // This identifier may never be macro expanded.
79249423Sdim    NeedsCleaning = 0x08,  // Contained an escaped newline or trigraph.
80234353Sdim    LeadingEmptyMacro = 0x10, // Empty macro exists before this token.
81249423Sdim    HasUDSuffix = 0x20,    // This string or character literal has a ud-suffix.
82263508Sdim    HasUCN = 0x40,         // This identifier contains a UCN.
83263508Sdim    IgnoredComma = 0x80    // This comma is not a macro argument separator (MS).
84193326Sed  };
85193326Sed
86193326Sed  tok::TokenKind getKind() const { return (tok::TokenKind)Kind; }
87193326Sed  void setKind(tok::TokenKind K) { Kind = K; }
88198092Srdivacky
89193326Sed  /// is/isNot - Predicates to check if this token is a specific kind, as in
90193326Sed  /// "if (Tok.is(tok::l_brace)) {...}".
91193326Sed  bool is(tok::TokenKind K) const { return Kind == (unsigned) K; }
92193326Sed  bool isNot(tok::TokenKind K) const { return Kind != (unsigned) K; }
93193326Sed
94239462Sdim  /// \brief Return true if this is a raw identifier (when lexing
95218893Sdim  /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).
96218893Sdim  bool isAnyIdentifier() const {
97243830Sdim    return tok::isAnyIdentifier(getKind());
98218893Sdim  }
99218893Sdim
100243830Sdim  /// \brief Return true if this is a "literal", like a numeric
101193326Sed  /// constant, string, etc.
102193326Sed  bool isLiteral() const {
103243830Sdim    return tok::isLiteral(getKind());
104193326Sed  }
105193326Sed
106243830Sdim  /// \brief Return true if this is any of tok::annot_* kind tokens.
107198092Srdivacky  bool isAnnotation() const {
108243830Sdim    return tok::isAnnotation(getKind());
109193326Sed  }
110198092Srdivacky
111239462Sdim  /// \brief Return a source location identifier for the specified
112193326Sed  /// offset in the current file.
113193326Sed  SourceLocation getLocation() const { return Loc; }
114193326Sed  unsigned getLength() const {
115193326Sed    assert(!isAnnotation() && "Annotation tokens have no length field");
116193326Sed    return UintData;
117193326Sed  }
118193326Sed
119193326Sed  void setLocation(SourceLocation L) { Loc = L; }
120193326Sed  void setLength(unsigned Len) {
121193326Sed    assert(!isAnnotation() && "Annotation tokens have no length field");
122193326Sed    UintData = Len;
123193326Sed  }
124193326Sed
125193326Sed  SourceLocation getAnnotationEndLoc() const {
126193326Sed    assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
127193326Sed    return SourceLocation::getFromRawEncoding(UintData);
128193326Sed  }
129193326Sed  void setAnnotationEndLoc(SourceLocation L) {
130193326Sed    assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
131193326Sed    UintData = L.getRawEncoding();
132193326Sed  }
133193326Sed
134203955Srdivacky  SourceLocation getLastLoc() const {
135203955Srdivacky    return isAnnotation() ? getAnnotationEndLoc() : getLocation();
136203955Srdivacky  }
137203955Srdivacky
138239462Sdim  /// \brief SourceRange of the group of tokens that this annotation token
139239462Sdim  /// represents.
140193326Sed  SourceRange getAnnotationRange() const {
141193326Sed    return SourceRange(getLocation(), getAnnotationEndLoc());
142193326Sed  }
143193326Sed  void setAnnotationRange(SourceRange R) {
144193326Sed    setLocation(R.getBegin());
145193326Sed    setAnnotationEndLoc(R.getEnd());
146193326Sed  }
147198092Srdivacky
148193326Sed  const char *getName() const {
149193326Sed    return tok::getTokenName( (tok::TokenKind) Kind);
150193326Sed  }
151198092Srdivacky
152239462Sdim  /// \brief Reset all flags to cleared.
153193326Sed  void startToken() {
154193326Sed    Kind = tok::unknown;
155193326Sed    Flags = 0;
156193326Sed    PtrData = 0;
157210299Sed    UintData = 0;
158193326Sed    Loc = SourceLocation();
159193326Sed  }
160198092Srdivacky
161193326Sed  IdentifierInfo *getIdentifierInfo() const {
162218893Sdim    assert(isNot(tok::raw_identifier) &&
163218893Sdim           "getIdentifierInfo() on a tok::raw_identifier token!");
164218893Sdim    assert(!isAnnotation() &&
165218893Sdim           "getIdentifierInfo() on an annotation token!");
166193326Sed    if (isLiteral()) return 0;
167193326Sed    return (IdentifierInfo*) PtrData;
168193326Sed  }
169193326Sed  void setIdentifierInfo(IdentifierInfo *II) {
170193326Sed    PtrData = (void*) II;
171193326Sed  }
172198092Srdivacky
173218893Sdim  /// getRawIdentifierData - For a raw identifier token (i.e., an identifier
174218893Sdim  /// lexed in raw mode), returns a pointer to the start of it in the text
175218893Sdim  /// buffer if known, null otherwise.
176218893Sdim  const char *getRawIdentifierData() const {
177218893Sdim    assert(is(tok::raw_identifier));
178218893Sdim    return reinterpret_cast<const char*>(PtrData);
179218893Sdim  }
180218893Sdim  void setRawIdentifierData(const char *Ptr) {
181218893Sdim    assert(is(tok::raw_identifier));
182218893Sdim    PtrData = const_cast<char*>(Ptr);
183218893Sdim  }
184218893Sdim
185193326Sed  /// getLiteralData - For a literal token (numeric constant, string, etc), this
186193326Sed  /// returns a pointer to the start of it in the text buffer if known, null
187193326Sed  /// otherwise.
188193326Sed  const char *getLiteralData() const {
189193326Sed    assert(isLiteral() && "Cannot get literal data of non-literal");
190193326Sed    return reinterpret_cast<const char*>(PtrData);
191193326Sed  }
192193326Sed  void setLiteralData(const char *Ptr) {
193193326Sed    assert(isLiteral() && "Cannot set literal data of non-literal");
194210299Sed    PtrData = const_cast<char*>(Ptr);
195193326Sed  }
196198092Srdivacky
197193326Sed  void *getAnnotationValue() const {
198193326Sed    assert(isAnnotation() && "Used AnnotVal on non-annotation token");
199193326Sed    return PtrData;
200193326Sed  }
201193326Sed  void setAnnotationValue(void *val) {
202193326Sed    assert(isAnnotation() && "Used AnnotVal on non-annotation token");
203193326Sed    PtrData = val;
204193326Sed  }
205198092Srdivacky
206239462Sdim  /// \brief Set the specified flag.
207193326Sed  void setFlag(TokenFlags Flag) {
208193326Sed    Flags |= Flag;
209193326Sed  }
210198092Srdivacky
211239462Sdim  /// \brief Unset the specified flag.
212193326Sed  void clearFlag(TokenFlags Flag) {
213193326Sed    Flags &= ~Flag;
214193326Sed  }
215198092Srdivacky
216239462Sdim  /// \brief Return the internal represtation of the flags.
217239462Sdim  ///
218239462Sdim  /// This is only intended for low-level operations such as writing tokens to
219239462Sdim  /// disk.
220193326Sed  unsigned getFlags() const {
221193326Sed    return Flags;
222193326Sed  }
223193326Sed
224239462Sdim  /// \brief Set a flag to either true or false.
225193326Sed  void setFlagValue(TokenFlags Flag, bool Val) {
226198092Srdivacky    if (Val)
227193326Sed      setFlag(Flag);
228193326Sed    else
229193326Sed      clearFlag(Flag);
230193326Sed  }
231198092Srdivacky
232193326Sed  /// isAtStartOfLine - Return true if this token is at the start of a line.
233193326Sed  ///
234193326Sed  bool isAtStartOfLine() const { return (Flags & StartOfLine) ? true : false; }
235198092Srdivacky
236239462Sdim  /// \brief Return true if this token has whitespace before it.
237193326Sed  ///
238193326Sed  bool hasLeadingSpace() const { return (Flags & LeadingSpace) ? true : false; }
239198092Srdivacky
240239462Sdim  /// \brief Return true if this identifier token should never
241193326Sed  /// be expanded in the future, due to C99 6.10.3.4p2.
242193326Sed  bool isExpandDisabled() const {
243193326Sed    return (Flags & DisableExpand) ? true : false;
244193326Sed  }
245198092Srdivacky
246239462Sdim  /// \brief Return true if we have an ObjC keyword identifier.
247193326Sed  bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const;
248198092Srdivacky
249239462Sdim  /// \brief Return the ObjC keyword kind.
250193326Sed  tok::ObjCKeywordKind getObjCKeywordID() const;
251198092Srdivacky
252239462Sdim  /// \brief Return true if this token has trigraphs or escaped newlines in it.
253193326Sed  bool needsCleaning() const { return (Flags & NeedsCleaning) ? true : false; }
254218893Sdim
255218893Sdim  /// \brief Return true if this token has an empty macro before it.
256218893Sdim  ///
257218893Sdim  bool hasLeadingEmptyMacro() const {
258218893Sdim    return (Flags & LeadingEmptyMacro) ? true : false;
259218893Sdim  }
260218893Sdim
261234353Sdim  /// \brief Return true if this token is a string or character literal which
262234353Sdim  /// has a ud-suffix.
263234353Sdim  bool hasUDSuffix() const { return (Flags & HasUDSuffix) ? true : false; }
264249423Sdim
265249423Sdim  /// Returns true if this token contains a universal character name.
266249423Sdim  bool hasUCN() const { return (Flags & HasUCN) ? true : false; }
267193326Sed};
268193326Sed
269239462Sdim/// \brief Information about the conditional stack (\#if directives)
270193326Sed/// currently active.
271193326Sedstruct PPConditionalInfo {
272239462Sdim  /// \brief Location where the conditional started.
273193326Sed  SourceLocation IfLoc;
274198092Srdivacky
275239462Sdim  /// \brief True if this was contained in a skipping directive, e.g.,
276239462Sdim  /// in a "\#if 0" block.
277193326Sed  bool WasSkipping;
278198092Srdivacky
279239462Sdim  /// \brief True if we have emitted tokens already, and now we're in
280239462Sdim  /// an \#else block or something.  Only useful in Skipping blocks.
281193326Sed  bool FoundNonSkip;
282198092Srdivacky
283239462Sdim  /// \brief True if we've seen a \#else in this block.  If so,
284239462Sdim  /// \#elif/\#else directives are not allowed.
285193326Sed  bool FoundElse;
286193326Sed};
287193326Sed
288193326Sed}  // end namespace clang
289193326Sed
290210299Sednamespace llvm {
291210299Sed  template <>
292210299Sed  struct isPodLike<clang::Token> { static const bool value = true; };
293210299Sed}  // end namespace llvm
294210299Sed
295193326Sed#endif
296