IdentifierTable.h revision 249423
1193326Sed//===--- IdentifierTable.h - Hash table for identifier lookup ---*- C++ -*-===//
2193326Sed//
3193326Sed//                     The LLVM Compiler Infrastructure
4193326Sed//
5193326Sed// This file is distributed under the University of Illinois Open Source
6193326Sed// License. See LICENSE.TXT for details.
7193326Sed//
8193326Sed//===----------------------------------------------------------------------===//
9239462Sdim///
10239462Sdim/// \file
11239462Sdim/// \brief Defines the clang::IdentifierInfo, clang::IdentifierTable, and
12239462Sdim/// clang::Selector interfaces.
13239462Sdim///
14193326Sed//===----------------------------------------------------------------------===//
15193326Sed
16193326Sed#ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17193326Sed#define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
18193326Sed
19249423Sdim#include "clang/Basic/LLVM.h"
20193326Sed#include "clang/Basic/OperatorKinds.h"
21193326Sed#include "clang/Basic/TokenKinds.h"
22193326Sed#include "llvm/ADT/StringMap.h"
23205219Srdivacky#include "llvm/ADT/StringRef.h"
24193326Sed#include "llvm/Support/PointerLikeTypeTraits.h"
25218893Sdim#include <cassert>
26198092Srdivacky#include <string>
27193326Sed
28193326Sednamespace llvm {
29193326Sed  template <typename T> struct DenseMapInfo;
30193326Sed}
31193326Sed
32193326Sednamespace clang {
33193326Sed  class LangOptions;
34193326Sed  class IdentifierInfo;
35193326Sed  class IdentifierTable;
36193326Sed  class SourceLocation;
37193326Sed  class MultiKeywordSelector; // private class used by Selector
38193326Sed  class DeclarationName;      // AST class that stores declaration names
39193326Sed
40239462Sdim  /// \brief A simple pair of identifier info and location.
41193326Sed  typedef std::pair<IdentifierInfo*, SourceLocation> IdentifierLocPair;
42198092Srdivacky
43198092Srdivacky
44239462Sdim/// One of these records is kept for each identifier that
45239462Sdim/// is lexed.  This contains information about whether the token was \#define'd,
46193326Sed/// is a language keyword, or if it is a front-end token of some sort (e.g. a
47193326Sed/// variable or function name).  The preprocessor keeps this information in a
48198092Srdivacky/// set, and all tok::identifier tokens have a pointer to one of these.
49193326Sedclass IdentifierInfo {
50226633Sdim  unsigned TokenID            : 9; // Front-end token ID or tok::identifier.
51193326Sed  // Objective-C keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
52193326Sed  // First NUM_OBJC_KEYWORDS values are for Objective-C, the remaining values
53193326Sed  // are for builtins.
54218893Sdim  unsigned ObjCOrBuiltinID    :11;
55193326Sed  bool HasMacro               : 1; // True if there is a #define for this.
56243830Sdim  bool HadMacro               : 1; // True if there was a #define for this.
57193326Sed  bool IsExtension            : 1; // True if identifier is a lang extension.
58226633Sdim  bool IsCXX11CompatKeyword   : 1; // True if identifier is a keyword in C++11.
59193326Sed  bool IsPoisoned             : 1; // True if identifier is poisoned.
60193326Sed  bool IsCPPOperatorKeyword   : 1; // True if ident is a C++ operator keyword.
61193326Sed  bool NeedsHandleIdentifier  : 1; // See "RecomputeNeedsHandleIdentifier".
62234353Sdim  bool IsFromAST              : 1; // True if identifier was loaded (at least
63234353Sdim                                   // partially) from an AST file.
64234353Sdim  bool ChangedAfterLoad       : 1; // True if identifier has changed from the
65234353Sdim                                   // definition loaded from an AST file.
66212904Sdim  bool RevertedTokenID        : 1; // True if RevertTokenIDToIdentifier was
67212904Sdim                                   // called.
68234353Sdim  bool OutOfDate              : 1; // True if there may be additional
69234353Sdim                                   // information about this identifier
70234353Sdim                                   // stored externally.
71239462Sdim  bool IsModulesImport        : 1; // True if this is the 'import' contextual
72234353Sdim                                   // keyword.
73243830Sdim  // 32-bit word is filled.
74243830Sdim
75193326Sed  void *FETokenInfo;               // Managed by the language front-end.
76193326Sed  llvm::StringMapEntry<IdentifierInfo*> *Entry;
77198092Srdivacky
78243830Sdim  IdentifierInfo(const IdentifierInfo&) LLVM_DELETED_FUNCTION;
79243830Sdim  void operator=(const IdentifierInfo&) LLVM_DELETED_FUNCTION;
80193326Sed
81198092Srdivacky  friend class IdentifierTable;
82218893Sdim
83193326Sedpublic:
84193326Sed  IdentifierInfo();
85193326Sed
86198092Srdivacky
87239462Sdim  /// \brief Return true if this is the identifier for the specified string.
88239462Sdim  ///
89193326Sed  /// This is intended to be used for string literals only: II->isStr("foo").
90193326Sed  template <std::size_t StrLen>
91193326Sed  bool isStr(const char (&Str)[StrLen]) const {
92198398Srdivacky    return getLength() == StrLen-1 && !memcmp(getNameStart(), Str, StrLen-1);
93193326Sed  }
94198092Srdivacky
95239462Sdim  /// \brief Return the beginning of the actual null-terminated string for this
96239462Sdim  /// identifier.
97193326Sed  ///
98198398Srdivacky  const char *getNameStart() const {
99193326Sed    if (Entry) return Entry->getKeyData();
100193326Sed    // FIXME: This is gross. It would be best not to embed specific details
101193326Sed    // of the PTH file format here.
102198092Srdivacky    // The 'this' pointer really points to a
103193326Sed    // std::pair<IdentifierInfo, const char*>, where internal pointer
104193326Sed    // points to the external string data.
105210299Sed    typedef std::pair<IdentifierInfo, const char*> actualtype;
106210299Sed    return ((const actualtype*) this)->second;
107193326Sed  }
108198092Srdivacky
109239462Sdim  /// \brief Efficiently return the length of this identifier info.
110193326Sed  ///
111193326Sed  unsigned getLength() const {
112193326Sed    if (Entry) return Entry->getKeyLength();
113193326Sed    // FIXME: This is gross. It would be best not to embed specific details
114193326Sed    // of the PTH file format here.
115198092Srdivacky    // The 'this' pointer really points to a
116193326Sed    // std::pair<IdentifierInfo, const char*>, where internal pointer
117193326Sed    // points to the external string data.
118210299Sed    typedef std::pair<IdentifierInfo, const char*> actualtype;
119210299Sed    const char* p = ((const actualtype*) this)->second - 2;
120198398Srdivacky    return (((unsigned) p[0]) | (((unsigned) p[1]) << 8)) - 1;
121193326Sed  }
122198092Srdivacky
123239462Sdim  /// \brief Return the actual identifier string.
124226633Sdim  StringRef getName() const {
125226633Sdim    return StringRef(getNameStart(), getLength());
126198398Srdivacky  }
127198398Srdivacky
128239462Sdim  /// \brief Return true if this identifier is \#defined to some other value.
129193326Sed  bool hasMacroDefinition() const {
130193326Sed    return HasMacro;
131193326Sed  }
132193326Sed  void setHasMacroDefinition(bool Val) {
133193326Sed    if (HasMacro == Val) return;
134198092Srdivacky
135193326Sed    HasMacro = Val;
136243830Sdim    if (Val) {
137193326Sed      NeedsHandleIdentifier = 1;
138243830Sdim      HadMacro = true;
139243830Sdim    } else {
140193326Sed      RecomputeNeedsHandleIdentifier();
141243830Sdim    }
142193326Sed  }
143243830Sdim  /// \brief Returns true if this identifier was \#defined to some value at any
144243830Sdim  /// moment. In this case there should be an entry for the identifier in the
145243830Sdim  /// macro history table in Preprocessor.
146243830Sdim  bool hadMacroDefinition() const {
147243830Sdim    return HadMacro;
148243830Sdim  }
149198092Srdivacky
150212904Sdim  /// getTokenID - If this is a source-language token (e.g. 'for'), this API
151193326Sed  /// can be used to cause the lexer to map identifiers to source-language
152193326Sed  /// tokens.
153193326Sed  tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
154198092Srdivacky
155212904Sdim  /// \brief True if RevertTokenIDToIdentifier() was called.
156212904Sdim  bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
157212904Sdim
158212904Sdim  /// \brief Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
159212904Sdim  /// compatibility.
160212904Sdim  ///
161212904Sdim  /// TokenID is normally read-only but there are 2 instances where we revert it
162212904Sdim  /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
163212904Sdim  /// using this method so we can inform serialization about it.
164212904Sdim  void RevertTokenIDToIdentifier() {
165212904Sdim    assert(TokenID != tok::identifier && "Already at tok::identifier");
166212904Sdim    TokenID = tok::identifier;
167212904Sdim    RevertedTokenID = true;
168212904Sdim  }
169212904Sdim
170239462Sdim  /// \brief Return the preprocessor keyword ID for this identifier.
171239462Sdim  ///
172193326Sed  /// For example, "define" will return tok::pp_define.
173193326Sed  tok::PPKeywordKind getPPKeywordID() const;
174198092Srdivacky
175239462Sdim  /// \brief Return the Objective-C keyword ID for the this identifier.
176239462Sdim  ///
177239462Sdim  /// For example, 'class' will return tok::objc_class if ObjC is enabled.
178193326Sed  tok::ObjCKeywordKind getObjCKeywordID() const {
179198092Srdivacky    if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
180193326Sed      return tok::ObjCKeywordKind(ObjCOrBuiltinID);
181193326Sed    else
182193326Sed      return tok::objc_not_keyword;
183193326Sed  }
184193326Sed  void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
185193326Sed
186193326Sed  /// getBuiltinID - Return a value indicating whether this is a builtin
187193326Sed  /// function.  0 is not-built-in.  1 is builtin-for-some-nonprimary-target.
188193326Sed  /// 2+ are specific builtin functions.
189198092Srdivacky  unsigned getBuiltinID() const {
190193326Sed    if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
191198092Srdivacky      return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
192193326Sed    else
193193326Sed      return 0;
194193326Sed  }
195193326Sed  void setBuiltinID(unsigned ID) {
196193326Sed    ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
197198092Srdivacky    assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
198193326Sed           && "ID too large for field!");
199193326Sed  }
200193326Sed
201193326Sed  unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
202193326Sed  void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
203193326Sed
204193326Sed  /// get/setExtension - Initialize information about whether or not this
205193326Sed  /// language token is an extension.  This controls extension warnings, and is
206193326Sed  /// only valid if a custom token ID is set.
207193326Sed  bool isExtensionToken() const { return IsExtension; }
208193326Sed  void setIsExtensionToken(bool Val) {
209193326Sed    IsExtension = Val;
210193326Sed    if (Val)
211193326Sed      NeedsHandleIdentifier = 1;
212193326Sed    else
213193326Sed      RecomputeNeedsHandleIdentifier();
214193326Sed  }
215198092Srdivacky
216226633Sdim  /// is/setIsCXX11CompatKeyword - Initialize information about whether or not
217226633Sdim  /// this language token is a keyword in C++11. This controls compatibility
218226633Sdim  /// warnings, and is only true when not parsing C++11. Once a compatibility
219226633Sdim  /// problem has been diagnosed with this keyword, the flag will be cleared.
220226633Sdim  bool isCXX11CompatKeyword() const { return IsCXX11CompatKeyword; }
221226633Sdim  void setIsCXX11CompatKeyword(bool Val) {
222226633Sdim    IsCXX11CompatKeyword = Val;
223226633Sdim    if (Val)
224226633Sdim      NeedsHandleIdentifier = 1;
225226633Sdim    else
226226633Sdim      RecomputeNeedsHandleIdentifier();
227226633Sdim  }
228226633Sdim
229193326Sed  /// setIsPoisoned - Mark this identifier as poisoned.  After poisoning, the
230193326Sed  /// Preprocessor will emit an error every time this token is used.
231193326Sed  void setIsPoisoned(bool Value = true) {
232193326Sed    IsPoisoned = Value;
233193326Sed    if (Value)
234193326Sed      NeedsHandleIdentifier = 1;
235193326Sed    else
236193326Sed      RecomputeNeedsHandleIdentifier();
237193326Sed  }
238198092Srdivacky
239193326Sed  /// isPoisoned - Return true if this token has been poisoned.
240193326Sed  bool isPoisoned() const { return IsPoisoned; }
241198092Srdivacky
242193326Sed  /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
243193326Sed  /// this identifier is a C++ alternate representation of an operator.
244193326Sed  void setIsCPlusPlusOperatorKeyword(bool Val = true) {
245193326Sed    IsCPPOperatorKeyword = Val;
246193326Sed    if (Val)
247193326Sed      NeedsHandleIdentifier = 1;
248193326Sed    else
249193326Sed      RecomputeNeedsHandleIdentifier();
250193326Sed  }
251193326Sed  bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
252193326Sed
253193326Sed  /// getFETokenInfo/setFETokenInfo - The language front-end is allowed to
254193326Sed  /// associate arbitrary metadata with this token.
255193326Sed  template<typename T>
256193326Sed  T *getFETokenInfo() const { return static_cast<T*>(FETokenInfo); }
257193326Sed  void setFETokenInfo(void *T) { FETokenInfo = T; }
258193326Sed
259193326Sed  /// isHandleIdentifierCase - Return true if the Preprocessor::HandleIdentifier
260193326Sed  /// must be called on a token of this identifier.  If this returns false, we
261193326Sed  /// know that HandleIdentifier will not affect the token.
262193326Sed  bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
263198092Srdivacky
264212904Sdim  /// isFromAST - Return true if the identifier in its current state was loaded
265212904Sdim  /// from an AST file.
266212904Sdim  bool isFromAST() const { return IsFromAST; }
267212904Sdim
268234353Sdim  void setIsFromAST() { IsFromAST = true; }
269212904Sdim
270234353Sdim  /// \brief Determine whether this identifier has changed since it was loaded
271234353Sdim  /// from an AST file.
272234353Sdim  bool hasChangedSinceDeserialization() const {
273234353Sdim    return ChangedAfterLoad;
274234353Sdim  }
275234353Sdim
276234353Sdim  /// \brief Note that this identifier has changed since it was loaded from
277234353Sdim  /// an AST file.
278234353Sdim  void setChangedSinceDeserialization() {
279234353Sdim    ChangedAfterLoad = true;
280234353Sdim  }
281234353Sdim
282234353Sdim  /// \brief Determine whether the information for this identifier is out of
283234353Sdim  /// date with respect to the external source.
284234353Sdim  bool isOutOfDate() const { return OutOfDate; }
285234353Sdim
286234353Sdim  /// \brief Set whether the information for this identifier is out of
287234353Sdim  /// date with respect to the external source.
288234353Sdim  void setOutOfDate(bool OOD) {
289234353Sdim    OutOfDate = OOD;
290234353Sdim    if (OOD)
291234353Sdim      NeedsHandleIdentifier = true;
292234353Sdim    else
293234353Sdim      RecomputeNeedsHandleIdentifier();
294234353Sdim  }
295234353Sdim
296234353Sdim  /// \brief Determine whether this is the contextual keyword
297249423Sdim  /// 'import'.
298234353Sdim  bool isModulesImport() const { return IsModulesImport; }
299234353Sdim
300234353Sdim  /// \brief Set whether this identifier is the contextual keyword
301249423Sdim  /// 'import'.
302234353Sdim  void setModulesImport(bool I) {
303234353Sdim    IsModulesImport = I;
304234353Sdim    if (I)
305234353Sdim      NeedsHandleIdentifier = true;
306234353Sdim    else
307234353Sdim      RecomputeNeedsHandleIdentifier();
308234353Sdim  }
309234353Sdim
310193326Sedprivate:
311193326Sed  /// RecomputeNeedsHandleIdentifier - The Preprocessor::HandleIdentifier does
312193326Sed  /// several special (but rare) things to identifiers of various sorts.  For
313193326Sed  /// example, it changes the "for" keyword token from tok::identifier to
314193326Sed  /// tok::for.
315193326Sed  ///
316193326Sed  /// This method is very tied to the definition of HandleIdentifier.  Any
317193326Sed  /// change to it should be reflected here.
318193326Sed  void RecomputeNeedsHandleIdentifier() {
319193326Sed    NeedsHandleIdentifier =
320193326Sed      (isPoisoned() | hasMacroDefinition() | isCPlusPlusOperatorKeyword() |
321234353Sdim       isExtensionToken() | isCXX11CompatKeyword() || isOutOfDate() ||
322234353Sdim       isModulesImport());
323193326Sed  }
324193326Sed};
325193326Sed
326221345Sdim/// \brief an RAII object for [un]poisoning an identifier
327221345Sdim/// within a certain scope. II is allowed to be null, in
328221345Sdim/// which case, objects of this type have no effect.
329221345Sdimclass PoisonIdentifierRAIIObject {
330221345Sdim  IdentifierInfo *const II;
331221345Sdim  const bool OldValue;
332221345Sdimpublic:
333221345Sdim  PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
334221345Sdim    : II(II), OldValue(II ? II->isPoisoned() : false) {
335221345Sdim    if(II)
336221345Sdim      II->setIsPoisoned(NewValue);
337221345Sdim  }
338221345Sdim
339221345Sdim  ~PoisonIdentifierRAIIObject() {
340221345Sdim    if(II)
341221345Sdim      II->setIsPoisoned(OldValue);
342221345Sdim  }
343221345Sdim};
344221345Sdim
345218893Sdim/// \brief An iterator that walks over all of the known identifiers
346218893Sdim/// in the lookup table.
347218893Sdim///
348218893Sdim/// Since this iterator uses an abstract interface via virtual
349218893Sdim/// functions, it uses an object-oriented interface rather than the
350218893Sdim/// more standard C++ STL iterator interface. In this OO-style
351218893Sdim/// iteration, the single function \c Next() provides dereference,
352218893Sdim/// advance, and end-of-sequence checking in a single
353218893Sdim/// operation. Subclasses of this iterator type will provide the
354218893Sdim/// actual functionality.
355218893Sdimclass IdentifierIterator {
356218893Sdimprivate:
357243830Sdim  IdentifierIterator(const IdentifierIterator &) LLVM_DELETED_FUNCTION;
358243830Sdim  void operator=(const IdentifierIterator &) LLVM_DELETED_FUNCTION;
359218893Sdim
360218893Sdimprotected:
361218893Sdim  IdentifierIterator() { }
362218893Sdim
363218893Sdimpublic:
364218893Sdim  virtual ~IdentifierIterator();
365218893Sdim
366218893Sdim  /// \brief Retrieve the next string in the identifier table and
367218893Sdim  /// advances the iterator for the following string.
368218893Sdim  ///
369218893Sdim  /// \returns The next string in the identifier table. If there is
370226633Sdim  /// no such string, returns an empty \c StringRef.
371226633Sdim  virtual StringRef Next() = 0;
372218893Sdim};
373218893Sdim
374193326Sed/// IdentifierInfoLookup - An abstract class used by IdentifierTable that
375193326Sed///  provides an interface for performing lookups from strings
376193326Sed/// (const char *) to IdentiferInfo objects.
377193326Sedclass IdentifierInfoLookup {
378193326Sedpublic:
379193326Sed  virtual ~IdentifierInfoLookup();
380198092Srdivacky
381193326Sed  /// get - Return the identifier token info for the specified named identifier.
382193326Sed  ///  Unlike the version in IdentifierTable, this returns a pointer instead
383193326Sed  ///  of a reference.  If the pointer is NULL then the IdentifierInfo cannot
384193326Sed  ///  be found.
385226633Sdim  virtual IdentifierInfo* get(StringRef Name) = 0;
386218893Sdim
387218893Sdim  /// \brief Retrieve an iterator into the set of all identifiers
388218893Sdim  /// known to this identifier lookup source.
389218893Sdim  ///
390218893Sdim  /// This routine provides access to all of the identifiers known to
391218893Sdim  /// the identifier lookup, allowing access to the contents of the
392218893Sdim  /// identifiers without introducing the overhead of constructing
393218893Sdim  /// IdentifierInfo objects for each.
394218893Sdim  ///
395218893Sdim  /// \returns A new iterator into the set of known identifiers. The
396218893Sdim  /// caller is responsible for deleting this iterator.
397218893Sdim  virtual IdentifierIterator *getIdentifiers() const;
398198092Srdivacky};
399193326Sed
400193326Sed/// \brief An abstract class used to resolve numerical identifier
401193326Sed/// references (meaningful only to some external source) into
402193326Sed/// IdentifierInfo pointers.
403193326Sedclass ExternalIdentifierLookup {
404193326Sedpublic:
405193326Sed  virtual ~ExternalIdentifierLookup();
406193326Sed
407193326Sed  /// \brief Return the identifier associated with the given ID number.
408193326Sed  ///
409193326Sed  /// The ID 0 is associated with the NULL identifier.
410193326Sed  virtual IdentifierInfo *GetIdentifier(unsigned ID) = 0;
411193326Sed};
412193326Sed
413239462Sdim/// \brief Implements an efficient mapping from strings to IdentifierInfo nodes.
414239462Sdim///
415239462Sdim/// This has no other purpose, but this is an extremely performance-critical
416239462Sdim/// piece of the code, as each occurrence of every identifier goes through
417239462Sdim/// here when lexed.
418193326Sedclass IdentifierTable {
419193326Sed  // Shark shows that using MallocAllocator is *much* slower than using this
420193326Sed  // BumpPtrAllocator!
421193326Sed  typedef llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator> HashTableTy;
422193326Sed  HashTableTy HashTable;
423198092Srdivacky
424193326Sed  IdentifierInfoLookup* ExternalLookup;
425193326Sed
426193326Sedpublic:
427239462Sdim  /// \brief Create the identifier table, populating it with info about the
428239462Sdim  /// language keywords for the language specified by \p LangOpts.
429193326Sed  IdentifierTable(const LangOptions &LangOpts,
430193326Sed                  IdentifierInfoLookup* externalLookup = 0);
431198092Srdivacky
432193326Sed  /// \brief Set the external identifier lookup mechanism.
433193326Sed  void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
434193326Sed    ExternalLookup = IILookup;
435193326Sed  }
436193326Sed
437218893Sdim  /// \brief Retrieve the external identifier lookup object, if any.
438218893Sdim  IdentifierInfoLookup *getExternalIdentifierLookup() const {
439218893Sdim    return ExternalLookup;
440218893Sdim  }
441218893Sdim
442193326Sed  llvm::BumpPtrAllocator& getAllocator() {
443193326Sed    return HashTable.getAllocator();
444193326Sed  }
445198092Srdivacky
446239462Sdim  /// \brief Return the identifier token info for the specified named
447239462Sdim  /// identifier.
448226633Sdim  IdentifierInfo &get(StringRef Name) {
449193326Sed    llvm::StringMapEntry<IdentifierInfo*> &Entry =
450205219Srdivacky      HashTable.GetOrCreateValue(Name);
451198092Srdivacky
452193326Sed    IdentifierInfo *II = Entry.getValue();
453193326Sed    if (II) return *II;
454198092Srdivacky
455193326Sed    // No entry; if we have an external lookup, look there first.
456193326Sed    if (ExternalLookup) {
457205219Srdivacky      II = ExternalLookup->get(Name);
458193326Sed      if (II) {
459193326Sed        // Cache in the StringMap for subsequent lookups.
460193326Sed        Entry.setValue(II);
461193326Sed        return *II;
462193326Sed      }
463193326Sed    }
464193326Sed
465193326Sed    // Lookups failed, make a new IdentifierInfo.
466193326Sed    void *Mem = getAllocator().Allocate<IdentifierInfo>();
467193326Sed    II = new (Mem) IdentifierInfo();
468193326Sed    Entry.setValue(II);
469193326Sed
470193326Sed    // Make sure getName() knows how to find the IdentifierInfo
471193326Sed    // contents.
472193326Sed    II->Entry = &Entry;
473193326Sed
474193326Sed    return *II;
475193326Sed  }
476198092Srdivacky
477226633Sdim  IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
478212904Sdim    IdentifierInfo &II = get(Name);
479212904Sdim    II.TokenID = TokenCode;
480226633Sdim    assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
481212904Sdim    return II;
482212904Sdim  }
483212904Sdim
484212904Sdim  /// \brief Gets an IdentifierInfo for the given name without consulting
485212904Sdim  ///        external sources.
486193326Sed  ///
487212904Sdim  /// This is a version of get() meant for external sources that want to
488212904Sdim  /// introduce or modify an identifier. If they called get(), they would
489212904Sdim  /// likely end up in a recursion.
490226633Sdim  IdentifierInfo &getOwn(StringRef Name) {
491193326Sed    llvm::StringMapEntry<IdentifierInfo*> &Entry =
492224145Sdim      HashTable.GetOrCreateValue(Name);
493198092Srdivacky
494193326Sed    IdentifierInfo *II = Entry.getValue();
495212904Sdim    if (!II) {
496198092Srdivacky
497212904Sdim      // Lookups failed, make a new IdentifierInfo.
498212904Sdim      void *Mem = getAllocator().Allocate<IdentifierInfo>();
499212904Sdim      II = new (Mem) IdentifierInfo();
500212904Sdim      Entry.setValue(II);
501193326Sed
502212904Sdim      // Make sure getName() knows how to find the IdentifierInfo
503212904Sdim      // contents.
504212904Sdim      II->Entry = &Entry;
505234353Sdim
506234353Sdim      // If this is the 'import' contextual keyword, mark it as such.
507234353Sdim      if (Name.equals("import"))
508234353Sdim        II->setModulesImport(true);
509212904Sdim    }
510193326Sed
511193326Sed    return *II;
512193326Sed  }
513193326Sed
514193326Sed  typedef HashTableTy::const_iterator iterator;
515193326Sed  typedef HashTableTy::const_iterator const_iterator;
516198092Srdivacky
517193326Sed  iterator begin() const { return HashTable.begin(); }
518193326Sed  iterator end() const   { return HashTable.end(); }
519193326Sed  unsigned size() const { return HashTable.size(); }
520198092Srdivacky
521239462Sdim  /// \brief Print some statistics to stderr that indicate how well the
522193326Sed  /// hashing is doing.
523193326Sed  void PrintStats() const;
524198092Srdivacky
525193326Sed  void AddKeywords(const LangOptions &LangOpts);
526193326Sed};
527193326Sed
528239462Sdim/// \brief A family of Objective-C methods.
529239462Sdim///
530239462Sdim/// These families have no inherent meaning in the language, but are
531221345Sdim/// nonetheless central enough in the existing implementations to
532221345Sdim/// merit direct AST support.  While, in theory, arbitrary methods can
533221345Sdim/// be considered to form families, we focus here on the methods
534221345Sdim/// involving allocation and retain-count management, as these are the
535221345Sdim/// most "core" and the most likely to be useful to diverse clients
536221345Sdim/// without extra information.
537221345Sdim///
538221345Sdim/// Both selectors and actual method declarations may be classified
539221345Sdim/// into families.  Method families may impose additional restrictions
540221345Sdim/// beyond their selector name; for example, a method called '_init'
541221345Sdim/// that returns void is not considered to be in the 'init' family
542221345Sdim/// (but would be if it returned 'id').  It is also possible to
543221345Sdim/// explicitly change or remove a method's family.  Therefore the
544221345Sdim/// method's family should be considered the single source of truth.
545221345Sdimenum ObjCMethodFamily {
546221345Sdim  /// \brief No particular method family.
547221345Sdim  OMF_None,
548221345Sdim
549221345Sdim  // Selectors in these families may have arbitrary arity, may be
550221345Sdim  // written with arbitrary leading underscores, and may have
551221345Sdim  // additional CamelCase "words" in their first selector chunk
552221345Sdim  // following the family name.
553221345Sdim  OMF_alloc,
554221345Sdim  OMF_copy,
555221345Sdim  OMF_init,
556221345Sdim  OMF_mutableCopy,
557221345Sdim  OMF_new,
558221345Sdim
559221345Sdim  // These families are singletons consisting only of the nullary
560221345Sdim  // selector with the given name.
561221345Sdim  OMF_autorelease,
562221345Sdim  OMF_dealloc,
563226633Sdim  OMF_finalize,
564221345Sdim  OMF_release,
565221345Sdim  OMF_retain,
566223017Sdim  OMF_retainCount,
567224145Sdim  OMF_self,
568224145Sdim
569224145Sdim  // performSelector families
570224145Sdim  OMF_performSelector
571221345Sdim};
572221345Sdim
573221345Sdim/// Enough bits to store any enumerator in ObjCMethodFamily or
574221345Sdim/// InvalidObjCMethodFamily.
575221345Sdimenum { ObjCMethodFamilyBitWidth = 4 };
576221345Sdim
577239462Sdim/// \brief An invalid value of ObjCMethodFamily.
578221345Sdimenum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
579221345Sdim
580239462Sdim/// \brief Smart pointer class that efficiently represents Objective-C method
581239462Sdim/// names.
582239462Sdim///
583239462Sdim/// This class will either point to an IdentifierInfo or a
584193326Sed/// MultiKeywordSelector (which is private). This enables us to optimize
585198092Srdivacky/// selectors that take no arguments and selectors that take 1 argument, which
586193326Sed/// accounts for 78% of all selectors in Cocoa.h.
587193326Sedclass Selector {
588226633Sdim  friend class Diagnostic;
589198092Srdivacky
590193326Sed  enum IdentifierInfoFlag {
591239462Sdim    // Empty selector = 0.
592193326Sed    ZeroArg  = 0x1,
593193326Sed    OneArg   = 0x2,
594239462Sdim    MultiArg = 0x3,
595193326Sed    ArgFlags = ZeroArg|OneArg
596193326Sed  };
597193326Sed  uintptr_t InfoPtr; // a pointer to the MultiKeywordSelector or IdentifierInfo.
598198092Srdivacky
599193326Sed  Selector(IdentifierInfo *II, unsigned nArgs) {
600193326Sed    InfoPtr = reinterpret_cast<uintptr_t>(II);
601193326Sed    assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
602193326Sed    assert(nArgs < 2 && "nArgs not equal to 0/1");
603193326Sed    InfoPtr |= nArgs+1;
604193326Sed  }
605193326Sed  Selector(MultiKeywordSelector *SI) {
606193326Sed    InfoPtr = reinterpret_cast<uintptr_t>(SI);
607193326Sed    assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
608239462Sdim    InfoPtr |= MultiArg;
609193326Sed  }
610198092Srdivacky
611193326Sed  IdentifierInfo *getAsIdentifierInfo() const {
612239462Sdim    if (getIdentifierInfoFlag() < MultiArg)
613193326Sed      return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
614193326Sed    return 0;
615193326Sed  }
616239462Sdim  MultiKeywordSelector *getMultiKeywordSelector() const {
617239462Sdim    return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
618239462Sdim  }
619239462Sdim
620193326Sed  unsigned getIdentifierInfoFlag() const {
621193326Sed    return InfoPtr & ArgFlags;
622193326Sed  }
623193326Sed
624221345Sdim  static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
625221345Sdim
626193326Sedpublic:
627193326Sed  friend class SelectorTable; // only the SelectorTable can create these
628193326Sed  friend class DeclarationName; // and the AST's DeclarationName.
629193326Sed
630193326Sed  /// The default ctor should only be used when creating data structures that
631193326Sed  ///  will contain selectors.
632193326Sed  Selector() : InfoPtr(0) {}
633193326Sed  Selector(uintptr_t V) : InfoPtr(V) {}
634193326Sed
635193326Sed  /// operator==/!= - Indicate whether the specified selectors are identical.
636193326Sed  bool operator==(Selector RHS) const {
637193326Sed    return InfoPtr == RHS.InfoPtr;
638193326Sed  }
639193326Sed  bool operator!=(Selector RHS) const {
640193326Sed    return InfoPtr != RHS.InfoPtr;
641193326Sed  }
642193326Sed  void *getAsOpaquePtr() const {
643193326Sed    return reinterpret_cast<void*>(InfoPtr);
644193326Sed  }
645193326Sed
646193326Sed  /// \brief Determine whether this is the empty selector.
647193326Sed  bool isNull() const { return InfoPtr == 0; }
648193326Sed
649193326Sed  // Predicates to identify the selector type.
650198092Srdivacky  bool isKeywordSelector() const {
651198092Srdivacky    return getIdentifierInfoFlag() != ZeroArg;
652193326Sed  }
653198092Srdivacky  bool isUnarySelector() const {
654193326Sed    return getIdentifierInfoFlag() == ZeroArg;
655193326Sed  }
656193326Sed  unsigned getNumArgs() const;
657218893Sdim
658218893Sdim
659218893Sdim  /// \brief Retrieve the identifier at a given position in the selector.
660218893Sdim  ///
661218893Sdim  /// Note that the identifier pointer returned may be NULL. Clients that only
662218893Sdim  /// care about the text of the identifier string, and not the specific,
663218893Sdim  /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
664218893Sdim  /// an empty string when the identifier pointer would be NULL.
665218893Sdim  ///
666218893Sdim  /// \param argIndex The index for which we want to retrieve the identifier.
667218893Sdim  /// This index shall be less than \c getNumArgs() unless this is a keyword
668218893Sdim  /// selector, in which case 0 is the only permissible value.
669218893Sdim  ///
670218893Sdim  /// \returns the uniqued identifier for this slot, or NULL if this slot has
671218893Sdim  /// no corresponding identifier.
672193326Sed  IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
673218893Sdim
674218893Sdim  /// \brief Retrieve the name at a given position in the selector.
675218893Sdim  ///
676218893Sdim  /// \param argIndex The index for which we want to retrieve the name.
677218893Sdim  /// This index shall be less than \c getNumArgs() unless this is a keyword
678218893Sdim  /// selector, in which case 0 is the only permissible value.
679218893Sdim  ///
680218893Sdim  /// \returns the name for this slot, which may be the empty string if no
681218893Sdim  /// name was supplied.
682226633Sdim  StringRef getNameForSlot(unsigned argIndex) const;
683218893Sdim
684239462Sdim  /// \brief Derive the full selector name (e.g. "foo:bar:") and return
685193326Sed  /// it as an std::string.
686239462Sdim  // FIXME: Add a print method that uses a raw_ostream.
687193326Sed  std::string getAsString() const;
688198092Srdivacky
689239462Sdim  /// \brief Derive the conventional family of this method.
690221345Sdim  ObjCMethodFamily getMethodFamily() const {
691221345Sdim    return getMethodFamilyImpl(*this);
692221345Sdim  }
693221345Sdim
694193326Sed  static Selector getEmptyMarker() {
695193326Sed    return Selector(uintptr_t(-1));
696193326Sed  }
697193326Sed  static Selector getTombstoneMarker() {
698193326Sed    return Selector(uintptr_t(-2));
699193326Sed  }
700193326Sed};
701193326Sed
702239462Sdim/// \brief This table allows us to fully hide how we implement
703193326Sed/// multi-keyword caching.
704193326Sedclass SelectorTable {
705193326Sed  void *Impl;  // Actually a SelectorTableImpl
706243830Sdim  SelectorTable(const SelectorTable &) LLVM_DELETED_FUNCTION;
707243830Sdim  void operator=(const SelectorTable &) LLVM_DELETED_FUNCTION;
708193326Sedpublic:
709193326Sed  SelectorTable();
710193326Sed  ~SelectorTable();
711193326Sed
712239462Sdim  /// \brief Can create any sort of selector.
713239462Sdim  ///
714239462Sdim  /// \p NumArgs indicates whether this is a no argument selector "foo", a
715239462Sdim  /// single argument selector "foo:" or multi-argument "foo:bar:".
716193326Sed  Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
717198092Srdivacky
718193326Sed  Selector getUnarySelector(IdentifierInfo *ID) {
719193326Sed    return Selector(ID, 1);
720193326Sed  }
721193326Sed  Selector getNullarySelector(IdentifierInfo *ID) {
722193326Sed    return Selector(ID, 0);
723193326Sed  }
724193326Sed
725239462Sdim  /// \brief Return the total amount of memory allocated for managing selectors.
726221345Sdim  size_t getTotalMemory() const;
727221345Sdim
728239462Sdim  /// \brief Return the setter name for the given identifier.
729239462Sdim  ///
730239462Sdim  /// This is "set" + \p Name where the initial character of \p Name
731193326Sed  /// has been capitalized.
732193326Sed  static Selector constructSetterName(IdentifierTable &Idents,
733193326Sed                                      SelectorTable &SelTable,
734234353Sdim                                      const IdentifierInfo *Name);
735193326Sed};
736193326Sed
737193326Sed/// DeclarationNameExtra - Common base of the MultiKeywordSelector,
738193326Sed/// CXXSpecialName, and CXXOperatorIdName classes, all of which are
739193326Sed/// private classes that describe different kinds of names.
740193326Sedclass DeclarationNameExtra {
741193326Sedpublic:
742193326Sed  /// ExtraKind - The kind of "extra" information stored in the
743193326Sed  /// DeclarationName. See @c ExtraKindOrNumArgs for an explanation of
744193326Sed  /// how these enumerator values are used.
745193326Sed  enum ExtraKind {
746193326Sed    CXXConstructor = 0,
747193326Sed    CXXDestructor,
748193326Sed    CXXConversionFunction,
749193326Sed#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
750193326Sed    CXXOperator##Name,
751193326Sed#include "clang/Basic/OperatorKinds.def"
752199990Srdivacky    CXXLiteralOperator,
753193326Sed    CXXUsingDirective,
754193326Sed    NUM_EXTRA_KINDS
755193326Sed  };
756193326Sed
757193326Sed  /// ExtraKindOrNumArgs - Either the kind of C++ special name or
758193326Sed  /// operator-id (if the value is one of the CXX* enumerators of
759193326Sed  /// ExtraKind), in which case the DeclarationNameExtra is also a
760199990Srdivacky  /// CXXSpecialName, (for CXXConstructor, CXXDestructor, or
761199990Srdivacky  /// CXXConversionFunction) CXXOperatorIdName, or CXXLiteralOperatorName,
762199990Srdivacky  /// it may be also name common to C++ using-directives (CXXUsingDirective),
763199990Srdivacky  /// otherwise it is NUM_EXTRA_KINDS+NumArgs, where NumArgs is the number of
764193326Sed  /// arguments in the Objective-C selector, in which case the
765193326Sed  /// DeclarationNameExtra is also a MultiKeywordSelector.
766193326Sed  unsigned ExtraKindOrNumArgs;
767193326Sed};
768193326Sed
769193326Sed}  // end namespace clang
770193326Sed
771193326Sednamespace llvm {
772193326Sed/// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
773193326Sed/// DenseSets.
774193326Sedtemplate <>
775193326Sedstruct DenseMapInfo<clang::Selector> {
776193326Sed  static inline clang::Selector getEmptyKey() {
777193326Sed    return clang::Selector::getEmptyMarker();
778193326Sed  }
779193326Sed  static inline clang::Selector getTombstoneKey() {
780198092Srdivacky    return clang::Selector::getTombstoneMarker();
781193326Sed  }
782198092Srdivacky
783193326Sed  static unsigned getHashValue(clang::Selector S);
784198092Srdivacky
785193326Sed  static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
786193326Sed    return LHS == RHS;
787193326Sed  }
788193326Sed};
789205219Srdivacky
790200583Srdivackytemplate <>
791200583Srdivackystruct isPodLike<clang::Selector> { static const bool value = true; };
792193326Sed
793218893Sdimtemplate<>
794218893Sdimclass PointerLikeTypeTraits<clang::Selector> {
795218893Sdimpublic:
796218893Sdim  static inline const void *getAsVoidPointer(clang::Selector P) {
797218893Sdim    return P.getAsOpaquePtr();
798218893Sdim  }
799218893Sdim  static inline clang::Selector getFromVoidPointer(const void *P) {
800218893Sdim    return clang::Selector(reinterpret_cast<uintptr_t>(P));
801218893Sdim  }
802218893Sdim  enum { NumLowBitsAvailable = 0 };
803218893Sdim};
804200583Srdivacky
805193326Sed// Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
806193326Sed// are not guaranteed to be 8-byte aligned.
807193326Sedtemplate<>
808193326Sedclass PointerLikeTypeTraits<clang::IdentifierInfo*> {
809193326Sedpublic:
810193326Sed  static inline void *getAsVoidPointer(clang::IdentifierInfo* P) {
811198092Srdivacky    return P;
812193326Sed  }
813193326Sed  static inline clang::IdentifierInfo *getFromVoidPointer(void *P) {
814193326Sed    return static_cast<clang::IdentifierInfo*>(P);
815193326Sed  }
816193326Sed  enum { NumLowBitsAvailable = 1 };
817193326Sed};
818193326Sed
819193326Sedtemplate<>
820193326Sedclass PointerLikeTypeTraits<const clang::IdentifierInfo*> {
821193326Sedpublic:
822193326Sed  static inline const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
823198092Srdivacky    return P;
824193326Sed  }
825193326Sed  static inline const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
826193326Sed    return static_cast<const clang::IdentifierInfo*>(P);
827193326Sed  }
828193326Sed  enum { NumLowBitsAvailable = 1 };
829193326Sed};
830193326Sed
831193326Sed}  // end namespace llvm
832193326Sed#endif
833