1193326Sed//===--- IdentifierTable.h - Hash table for identifier lookup ---*- C++ -*-===//
2193326Sed//
3193326Sed//                     The LLVM Compiler Infrastructure
4193326Sed//
5193326Sed// This file is distributed under the University of Illinois Open Source
6193326Sed// License. See LICENSE.TXT for details.
7193326Sed//
8193326Sed//===----------------------------------------------------------------------===//
9239462Sdim///
10239462Sdim/// \file
11239462Sdim/// \brief Defines the clang::IdentifierInfo, clang::IdentifierTable, and
12239462Sdim/// clang::Selector interfaces.
13239462Sdim///
14193326Sed//===----------------------------------------------------------------------===//
15193326Sed
16193326Sed#ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17193326Sed#define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
18193326Sed
19249423Sdim#include "clang/Basic/LLVM.h"
20193326Sed#include "clang/Basic/OperatorKinds.h"
21193326Sed#include "clang/Basic/TokenKinds.h"
22263508Sdim#include "llvm/ADT/SmallString.h"
23193326Sed#include "llvm/ADT/StringMap.h"
24205219Srdivacky#include "llvm/ADT/StringRef.h"
25193326Sed#include "llvm/Support/PointerLikeTypeTraits.h"
26218893Sdim#include <cassert>
27198092Srdivacky#include <string>
28193326Sed
29193326Sednamespace llvm {
30193326Sed  template <typename T> struct DenseMapInfo;
31193326Sed}
32193326Sed
33193326Sednamespace clang {
34193326Sed  class LangOptions;
35193326Sed  class IdentifierInfo;
36193326Sed  class IdentifierTable;
37193326Sed  class SourceLocation;
38193326Sed  class MultiKeywordSelector; // private class used by Selector
39193326Sed  class DeclarationName;      // AST class that stores declaration names
40193326Sed
41239462Sdim  /// \brief A simple pair of identifier info and location.
42193326Sed  typedef std::pair<IdentifierInfo*, SourceLocation> IdentifierLocPair;
43198092Srdivacky
44198092Srdivacky
45239462Sdim/// One of these records is kept for each identifier that
46239462Sdim/// is lexed.  This contains information about whether the token was \#define'd,
47193326Sed/// is a language keyword, or if it is a front-end token of some sort (e.g. a
48193326Sed/// variable or function name).  The preprocessor keeps this information in a
49198092Srdivacky/// set, and all tok::identifier tokens have a pointer to one of these.
50193326Sedclass IdentifierInfo {
51226633Sdim  unsigned TokenID            : 9; // Front-end token ID or tok::identifier.
52193326Sed  // Objective-C keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
53193326Sed  // First NUM_OBJC_KEYWORDS values are for Objective-C, the remaining values
54193326Sed  // are for builtins.
55218893Sdim  unsigned ObjCOrBuiltinID    :11;
56193326Sed  bool HasMacro               : 1; // True if there is a #define for this.
57243830Sdim  bool HadMacro               : 1; // True if there was a #define for this.
58193326Sed  bool IsExtension            : 1; // True if identifier is a lang extension.
59226633Sdim  bool IsCXX11CompatKeyword   : 1; // True if identifier is a keyword in C++11.
60193326Sed  bool IsPoisoned             : 1; // True if identifier is poisoned.
61193326Sed  bool IsCPPOperatorKeyword   : 1; // True if ident is a C++ operator keyword.
62193326Sed  bool NeedsHandleIdentifier  : 1; // See "RecomputeNeedsHandleIdentifier".
63234353Sdim  bool IsFromAST              : 1; // True if identifier was loaded (at least
64234353Sdim                                   // partially) from an AST file.
65234353Sdim  bool ChangedAfterLoad       : 1; // True if identifier has changed from the
66234353Sdim                                   // definition loaded from an AST file.
67212904Sdim  bool RevertedTokenID        : 1; // True if RevertTokenIDToIdentifier was
68212904Sdim                                   // called.
69234353Sdim  bool OutOfDate              : 1; // True if there may be additional
70234353Sdim                                   // information about this identifier
71234353Sdim                                   // stored externally.
72239462Sdim  bool IsModulesImport        : 1; // True if this is the 'import' contextual
73234353Sdim                                   // keyword.
74243830Sdim  // 32-bit word is filled.
75243830Sdim
76193326Sed  void *FETokenInfo;               // Managed by the language front-end.
77193326Sed  llvm::StringMapEntry<IdentifierInfo*> *Entry;
78198092Srdivacky
79243830Sdim  IdentifierInfo(const IdentifierInfo&) LLVM_DELETED_FUNCTION;
80243830Sdim  void operator=(const IdentifierInfo&) LLVM_DELETED_FUNCTION;
81193326Sed
82198092Srdivacky  friend class IdentifierTable;
83218893Sdim
84193326Sedpublic:
85193326Sed  IdentifierInfo();
86193326Sed
87198092Srdivacky
88239462Sdim  /// \brief Return true if this is the identifier for the specified string.
89239462Sdim  ///
90193326Sed  /// This is intended to be used for string literals only: II->isStr("foo").
91193326Sed  template <std::size_t StrLen>
92193326Sed  bool isStr(const char (&Str)[StrLen]) const {
93198398Srdivacky    return getLength() == StrLen-1 && !memcmp(getNameStart(), Str, StrLen-1);
94193326Sed  }
95198092Srdivacky
96239462Sdim  /// \brief Return the beginning of the actual null-terminated string for this
97239462Sdim  /// identifier.
98193326Sed  ///
99198398Srdivacky  const char *getNameStart() const {
100193326Sed    if (Entry) return Entry->getKeyData();
101193326Sed    // FIXME: This is gross. It would be best not to embed specific details
102193326Sed    // of the PTH file format here.
103198092Srdivacky    // The 'this' pointer really points to a
104193326Sed    // std::pair<IdentifierInfo, const char*>, where internal pointer
105193326Sed    // points to the external string data.
106210299Sed    typedef std::pair<IdentifierInfo, const char*> actualtype;
107210299Sed    return ((const actualtype*) this)->second;
108193326Sed  }
109198092Srdivacky
110239462Sdim  /// \brief Efficiently return the length of this identifier info.
111193326Sed  ///
112193326Sed  unsigned getLength() const {
113193326Sed    if (Entry) return Entry->getKeyLength();
114193326Sed    // FIXME: This is gross. It would be best not to embed specific details
115193326Sed    // of the PTH file format here.
116198092Srdivacky    // The 'this' pointer really points to a
117193326Sed    // std::pair<IdentifierInfo, const char*>, where internal pointer
118193326Sed    // points to the external string data.
119210299Sed    typedef std::pair<IdentifierInfo, const char*> actualtype;
120210299Sed    const char* p = ((const actualtype*) this)->second - 2;
121198398Srdivacky    return (((unsigned) p[0]) | (((unsigned) p[1]) << 8)) - 1;
122193326Sed  }
123198092Srdivacky
124239462Sdim  /// \brief Return the actual identifier string.
125226633Sdim  StringRef getName() const {
126226633Sdim    return StringRef(getNameStart(), getLength());
127198398Srdivacky  }
128198398Srdivacky
129239462Sdim  /// \brief Return true if this identifier is \#defined to some other value.
130193326Sed  bool hasMacroDefinition() const {
131193326Sed    return HasMacro;
132193326Sed  }
133193326Sed  void setHasMacroDefinition(bool Val) {
134193326Sed    if (HasMacro == Val) return;
135198092Srdivacky
136193326Sed    HasMacro = Val;
137243830Sdim    if (Val) {
138193326Sed      NeedsHandleIdentifier = 1;
139243830Sdim      HadMacro = true;
140243830Sdim    } else {
141193326Sed      RecomputeNeedsHandleIdentifier();
142243830Sdim    }
143193326Sed  }
144243830Sdim  /// \brief Returns true if this identifier was \#defined to some value at any
145243830Sdim  /// moment. In this case there should be an entry for the identifier in the
146243830Sdim  /// macro history table in Preprocessor.
147243830Sdim  bool hadMacroDefinition() const {
148243830Sdim    return HadMacro;
149243830Sdim  }
150198092Srdivacky
151263508Sdim  /// If this is a source-language token (e.g. 'for'), this API
152193326Sed  /// can be used to cause the lexer to map identifiers to source-language
153193326Sed  /// tokens.
154193326Sed  tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
155198092Srdivacky
156212904Sdim  /// \brief True if RevertTokenIDToIdentifier() was called.
157212904Sdim  bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
158212904Sdim
159212904Sdim  /// \brief Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
160212904Sdim  /// compatibility.
161212904Sdim  ///
162212904Sdim  /// TokenID is normally read-only but there are 2 instances where we revert it
163212904Sdim  /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
164212904Sdim  /// using this method so we can inform serialization about it.
165212904Sdim  void RevertTokenIDToIdentifier() {
166212904Sdim    assert(TokenID != tok::identifier && "Already at tok::identifier");
167212904Sdim    TokenID = tok::identifier;
168212904Sdim    RevertedTokenID = true;
169212904Sdim  }
170212904Sdim
171239462Sdim  /// \brief Return the preprocessor keyword ID for this identifier.
172239462Sdim  ///
173193326Sed  /// For example, "define" will return tok::pp_define.
174193326Sed  tok::PPKeywordKind getPPKeywordID() const;
175198092Srdivacky
176239462Sdim  /// \brief Return the Objective-C keyword ID for the this identifier.
177239462Sdim  ///
178239462Sdim  /// For example, 'class' will return tok::objc_class if ObjC is enabled.
179193326Sed  tok::ObjCKeywordKind getObjCKeywordID() const {
180198092Srdivacky    if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
181193326Sed      return tok::ObjCKeywordKind(ObjCOrBuiltinID);
182193326Sed    else
183193326Sed      return tok::objc_not_keyword;
184193326Sed  }
185193326Sed  void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
186193326Sed
187263508Sdim  /// \brief Return a value indicating whether this is a builtin function.
188263508Sdim  ///
189263508Sdim  /// 0 is not-built-in.  1 is builtin-for-some-nonprimary-target.
190193326Sed  /// 2+ are specific builtin functions.
191198092Srdivacky  unsigned getBuiltinID() const {
192193326Sed    if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
193198092Srdivacky      return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
194193326Sed    else
195193326Sed      return 0;
196193326Sed  }
197193326Sed  void setBuiltinID(unsigned ID) {
198193326Sed    ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
199198092Srdivacky    assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
200193326Sed           && "ID too large for field!");
201193326Sed  }
202193326Sed
203193326Sed  unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
204193326Sed  void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
205193326Sed
206193326Sed  /// get/setExtension - Initialize information about whether or not this
207193326Sed  /// language token is an extension.  This controls extension warnings, and is
208193326Sed  /// only valid if a custom token ID is set.
209193326Sed  bool isExtensionToken() const { return IsExtension; }
210193326Sed  void setIsExtensionToken(bool Val) {
211193326Sed    IsExtension = Val;
212193326Sed    if (Val)
213193326Sed      NeedsHandleIdentifier = 1;
214193326Sed    else
215193326Sed      RecomputeNeedsHandleIdentifier();
216193326Sed  }
217198092Srdivacky
218226633Sdim  /// is/setIsCXX11CompatKeyword - Initialize information about whether or not
219226633Sdim  /// this language token is a keyword in C++11. This controls compatibility
220226633Sdim  /// warnings, and is only true when not parsing C++11. Once a compatibility
221226633Sdim  /// problem has been diagnosed with this keyword, the flag will be cleared.
222226633Sdim  bool isCXX11CompatKeyword() const { return IsCXX11CompatKeyword; }
223226633Sdim  void setIsCXX11CompatKeyword(bool Val) {
224226633Sdim    IsCXX11CompatKeyword = Val;
225226633Sdim    if (Val)
226226633Sdim      NeedsHandleIdentifier = 1;
227226633Sdim    else
228226633Sdim      RecomputeNeedsHandleIdentifier();
229226633Sdim  }
230226633Sdim
231193326Sed  /// setIsPoisoned - Mark this identifier as poisoned.  After poisoning, the
232193326Sed  /// Preprocessor will emit an error every time this token is used.
233193326Sed  void setIsPoisoned(bool Value = true) {
234193326Sed    IsPoisoned = Value;
235193326Sed    if (Value)
236193326Sed      NeedsHandleIdentifier = 1;
237193326Sed    else
238193326Sed      RecomputeNeedsHandleIdentifier();
239193326Sed  }
240198092Srdivacky
241263508Sdim  /// \brief Return true if this token has been poisoned.
242193326Sed  bool isPoisoned() const { return IsPoisoned; }
243198092Srdivacky
244193326Sed  /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
245193326Sed  /// this identifier is a C++ alternate representation of an operator.
246193326Sed  void setIsCPlusPlusOperatorKeyword(bool Val = true) {
247193326Sed    IsCPPOperatorKeyword = Val;
248193326Sed    if (Val)
249193326Sed      NeedsHandleIdentifier = 1;
250193326Sed    else
251193326Sed      RecomputeNeedsHandleIdentifier();
252193326Sed  }
253193326Sed  bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
254193326Sed
255193326Sed  /// getFETokenInfo/setFETokenInfo - The language front-end is allowed to
256193326Sed  /// associate arbitrary metadata with this token.
257193326Sed  template<typename T>
258193326Sed  T *getFETokenInfo() const { return static_cast<T*>(FETokenInfo); }
259193326Sed  void setFETokenInfo(void *T) { FETokenInfo = T; }
260193326Sed
261263508Sdim  /// \brief Return true if the Preprocessor::HandleIdentifier must be called
262263508Sdim  /// on a token of this identifier.
263263508Sdim  ///
264263508Sdim  /// If this returns false, we know that HandleIdentifier will not affect
265263508Sdim  /// the token.
266193326Sed  bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
267198092Srdivacky
268263508Sdim  /// \brief Return true if the identifier in its current state was loaded
269212904Sdim  /// from an AST file.
270212904Sdim  bool isFromAST() const { return IsFromAST; }
271212904Sdim
272234353Sdim  void setIsFromAST() { IsFromAST = true; }
273212904Sdim
274234353Sdim  /// \brief Determine whether this identifier has changed since it was loaded
275234353Sdim  /// from an AST file.
276234353Sdim  bool hasChangedSinceDeserialization() const {
277234353Sdim    return ChangedAfterLoad;
278234353Sdim  }
279234353Sdim
280234353Sdim  /// \brief Note that this identifier has changed since it was loaded from
281234353Sdim  /// an AST file.
282234353Sdim  void setChangedSinceDeserialization() {
283234353Sdim    ChangedAfterLoad = true;
284234353Sdim  }
285234353Sdim
286234353Sdim  /// \brief Determine whether the information for this identifier is out of
287234353Sdim  /// date with respect to the external source.
288234353Sdim  bool isOutOfDate() const { return OutOfDate; }
289234353Sdim
290234353Sdim  /// \brief Set whether the information for this identifier is out of
291234353Sdim  /// date with respect to the external source.
292234353Sdim  void setOutOfDate(bool OOD) {
293234353Sdim    OutOfDate = OOD;
294234353Sdim    if (OOD)
295234353Sdim      NeedsHandleIdentifier = true;
296234353Sdim    else
297234353Sdim      RecomputeNeedsHandleIdentifier();
298234353Sdim  }
299234353Sdim
300263508Sdim  /// \brief Determine whether this is the contextual keyword \c import.
301234353Sdim  bool isModulesImport() const { return IsModulesImport; }
302234353Sdim
303263508Sdim  /// \brief Set whether this identifier is the contextual keyword \c import.
304234353Sdim  void setModulesImport(bool I) {
305234353Sdim    IsModulesImport = I;
306234353Sdim    if (I)
307234353Sdim      NeedsHandleIdentifier = true;
308234353Sdim    else
309234353Sdim      RecomputeNeedsHandleIdentifier();
310234353Sdim  }
311234353Sdim
312193326Sedprivate:
313263508Sdim  /// The Preprocessor::HandleIdentifier does several special (but rare)
314263508Sdim  /// things to identifiers of various sorts.  For example, it changes the
315263508Sdim  /// \c for keyword token from tok::identifier to tok::for.
316193326Sed  ///
317193326Sed  /// This method is very tied to the definition of HandleIdentifier.  Any
318193326Sed  /// change to it should be reflected here.
319193326Sed  void RecomputeNeedsHandleIdentifier() {
320193326Sed    NeedsHandleIdentifier =
321193326Sed      (isPoisoned() | hasMacroDefinition() | isCPlusPlusOperatorKeyword() |
322234353Sdim       isExtensionToken() | isCXX11CompatKeyword() || isOutOfDate() ||
323234353Sdim       isModulesImport());
324193326Sed  }
325193326Sed};
326193326Sed
327263508Sdim/// \brief An RAII object for [un]poisoning an identifier within a scope.
328263508Sdim///
329263508Sdim/// \p II is allowed to be null, in which case objects of this type have
330263508Sdim/// no effect.
331221345Sdimclass PoisonIdentifierRAIIObject {
332221345Sdim  IdentifierInfo *const II;
333221345Sdim  const bool OldValue;
334221345Sdimpublic:
335221345Sdim  PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
336221345Sdim    : II(II), OldValue(II ? II->isPoisoned() : false) {
337221345Sdim    if(II)
338221345Sdim      II->setIsPoisoned(NewValue);
339221345Sdim  }
340221345Sdim
341221345Sdim  ~PoisonIdentifierRAIIObject() {
342221345Sdim    if(II)
343221345Sdim      II->setIsPoisoned(OldValue);
344221345Sdim  }
345221345Sdim};
346221345Sdim
347218893Sdim/// \brief An iterator that walks over all of the known identifiers
348218893Sdim/// in the lookup table.
349218893Sdim///
350218893Sdim/// Since this iterator uses an abstract interface via virtual
351218893Sdim/// functions, it uses an object-oriented interface rather than the
352218893Sdim/// more standard C++ STL iterator interface. In this OO-style
353218893Sdim/// iteration, the single function \c Next() provides dereference,
354218893Sdim/// advance, and end-of-sequence checking in a single
355218893Sdim/// operation. Subclasses of this iterator type will provide the
356218893Sdim/// actual functionality.
357218893Sdimclass IdentifierIterator {
358218893Sdimprivate:
359243830Sdim  IdentifierIterator(const IdentifierIterator &) LLVM_DELETED_FUNCTION;
360243830Sdim  void operator=(const IdentifierIterator &) LLVM_DELETED_FUNCTION;
361218893Sdim
362218893Sdimprotected:
363218893Sdim  IdentifierIterator() { }
364218893Sdim
365218893Sdimpublic:
366218893Sdim  virtual ~IdentifierIterator();
367218893Sdim
368218893Sdim  /// \brief Retrieve the next string in the identifier table and
369218893Sdim  /// advances the iterator for the following string.
370218893Sdim  ///
371218893Sdim  /// \returns The next string in the identifier table. If there is
372226633Sdim  /// no such string, returns an empty \c StringRef.
373226633Sdim  virtual StringRef Next() = 0;
374218893Sdim};
375218893Sdim
376263508Sdim/// \brief Provides lookups to, and iteration over, IdentiferInfo objects.
377193326Sedclass IdentifierInfoLookup {
378193326Sedpublic:
379193326Sed  virtual ~IdentifierInfoLookup();
380198092Srdivacky
381263508Sdim  /// \brief Return the IdentifierInfo for the specified named identifier.
382263508Sdim  ///
383263508Sdim  /// Unlike the version in IdentifierTable, this returns a pointer instead
384263508Sdim  /// of a reference.  If the pointer is null then the IdentifierInfo cannot
385263508Sdim  /// be found.
386226633Sdim  virtual IdentifierInfo* get(StringRef Name) = 0;
387218893Sdim
388218893Sdim  /// \brief Retrieve an iterator into the set of all identifiers
389218893Sdim  /// known to this identifier lookup source.
390218893Sdim  ///
391218893Sdim  /// This routine provides access to all of the identifiers known to
392218893Sdim  /// the identifier lookup, allowing access to the contents of the
393218893Sdim  /// identifiers without introducing the overhead of constructing
394218893Sdim  /// IdentifierInfo objects for each.
395218893Sdim  ///
396218893Sdim  /// \returns A new iterator into the set of known identifiers. The
397218893Sdim  /// caller is responsible for deleting this iterator.
398251662Sdim  virtual IdentifierIterator *getIdentifiers();
399198092Srdivacky};
400193326Sed
401193326Sed/// \brief An abstract class used to resolve numerical identifier
402193326Sed/// references (meaningful only to some external source) into
403193326Sed/// IdentifierInfo pointers.
404193326Sedclass ExternalIdentifierLookup {
405193326Sedpublic:
406193326Sed  virtual ~ExternalIdentifierLookup();
407193326Sed
408193326Sed  /// \brief Return the identifier associated with the given ID number.
409193326Sed  ///
410193326Sed  /// The ID 0 is associated with the NULL identifier.
411193326Sed  virtual IdentifierInfo *GetIdentifier(unsigned ID) = 0;
412193326Sed};
413193326Sed
414239462Sdim/// \brief Implements an efficient mapping from strings to IdentifierInfo nodes.
415239462Sdim///
416239462Sdim/// This has no other purpose, but this is an extremely performance-critical
417239462Sdim/// piece of the code, as each occurrence of every identifier goes through
418239462Sdim/// here when lexed.
419193326Sedclass IdentifierTable {
420193326Sed  // Shark shows that using MallocAllocator is *much* slower than using this
421193326Sed  // BumpPtrAllocator!
422193326Sed  typedef llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator> HashTableTy;
423193326Sed  HashTableTy HashTable;
424198092Srdivacky
425193326Sed  IdentifierInfoLookup* ExternalLookup;
426193326Sed
427193326Sedpublic:
428239462Sdim  /// \brief Create the identifier table, populating it with info about the
429239462Sdim  /// language keywords for the language specified by \p LangOpts.
430193326Sed  IdentifierTable(const LangOptions &LangOpts,
431193326Sed                  IdentifierInfoLookup* externalLookup = 0);
432198092Srdivacky
433193326Sed  /// \brief Set the external identifier lookup mechanism.
434193326Sed  void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
435193326Sed    ExternalLookup = IILookup;
436193326Sed  }
437193326Sed
438218893Sdim  /// \brief Retrieve the external identifier lookup object, if any.
439218893Sdim  IdentifierInfoLookup *getExternalIdentifierLookup() const {
440218893Sdim    return ExternalLookup;
441218893Sdim  }
442218893Sdim
443193326Sed  llvm::BumpPtrAllocator& getAllocator() {
444193326Sed    return HashTable.getAllocator();
445193326Sed  }
446198092Srdivacky
447239462Sdim  /// \brief Return the identifier token info for the specified named
448239462Sdim  /// identifier.
449226633Sdim  IdentifierInfo &get(StringRef Name) {
450193326Sed    llvm::StringMapEntry<IdentifierInfo*> &Entry =
451205219Srdivacky      HashTable.GetOrCreateValue(Name);
452198092Srdivacky
453193326Sed    IdentifierInfo *II = Entry.getValue();
454193326Sed    if (II) return *II;
455198092Srdivacky
456193326Sed    // No entry; if we have an external lookup, look there first.
457193326Sed    if (ExternalLookup) {
458205219Srdivacky      II = ExternalLookup->get(Name);
459193326Sed      if (II) {
460193326Sed        // Cache in the StringMap for subsequent lookups.
461193326Sed        Entry.setValue(II);
462193326Sed        return *II;
463193326Sed      }
464193326Sed    }
465193326Sed
466193326Sed    // Lookups failed, make a new IdentifierInfo.
467193326Sed    void *Mem = getAllocator().Allocate<IdentifierInfo>();
468193326Sed    II = new (Mem) IdentifierInfo();
469193326Sed    Entry.setValue(II);
470193326Sed
471193326Sed    // Make sure getName() knows how to find the IdentifierInfo
472193326Sed    // contents.
473193326Sed    II->Entry = &Entry;
474193326Sed
475193326Sed    return *II;
476193326Sed  }
477198092Srdivacky
478226633Sdim  IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
479212904Sdim    IdentifierInfo &II = get(Name);
480212904Sdim    II.TokenID = TokenCode;
481226633Sdim    assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
482212904Sdim    return II;
483212904Sdim  }
484212904Sdim
485212904Sdim  /// \brief Gets an IdentifierInfo for the given name without consulting
486212904Sdim  ///        external sources.
487193326Sed  ///
488212904Sdim  /// This is a version of get() meant for external sources that want to
489212904Sdim  /// introduce or modify an identifier. If they called get(), they would
490212904Sdim  /// likely end up in a recursion.
491226633Sdim  IdentifierInfo &getOwn(StringRef Name) {
492193326Sed    llvm::StringMapEntry<IdentifierInfo*> &Entry =
493224145Sdim      HashTable.GetOrCreateValue(Name);
494198092Srdivacky
495193326Sed    IdentifierInfo *II = Entry.getValue();
496212904Sdim    if (!II) {
497198092Srdivacky
498212904Sdim      // Lookups failed, make a new IdentifierInfo.
499212904Sdim      void *Mem = getAllocator().Allocate<IdentifierInfo>();
500212904Sdim      II = new (Mem) IdentifierInfo();
501212904Sdim      Entry.setValue(II);
502193326Sed
503212904Sdim      // Make sure getName() knows how to find the IdentifierInfo
504212904Sdim      // contents.
505212904Sdim      II->Entry = &Entry;
506234353Sdim
507234353Sdim      // If this is the 'import' contextual keyword, mark it as such.
508234353Sdim      if (Name.equals("import"))
509234353Sdim        II->setModulesImport(true);
510212904Sdim    }
511193326Sed
512193326Sed    return *II;
513193326Sed  }
514193326Sed
515193326Sed  typedef HashTableTy::const_iterator iterator;
516193326Sed  typedef HashTableTy::const_iterator const_iterator;
517198092Srdivacky
518193326Sed  iterator begin() const { return HashTable.begin(); }
519193326Sed  iterator end() const   { return HashTable.end(); }
520193326Sed  unsigned size() const { return HashTable.size(); }
521198092Srdivacky
522239462Sdim  /// \brief Print some statistics to stderr that indicate how well the
523193326Sed  /// hashing is doing.
524193326Sed  void PrintStats() const;
525198092Srdivacky
526193326Sed  void AddKeywords(const LangOptions &LangOpts);
527193326Sed};
528193326Sed
529239462Sdim/// \brief A family of Objective-C methods.
530239462Sdim///
531239462Sdim/// These families have no inherent meaning in the language, but are
532221345Sdim/// nonetheless central enough in the existing implementations to
533221345Sdim/// merit direct AST support.  While, in theory, arbitrary methods can
534221345Sdim/// be considered to form families, we focus here on the methods
535221345Sdim/// involving allocation and retain-count management, as these are the
536221345Sdim/// most "core" and the most likely to be useful to diverse clients
537221345Sdim/// without extra information.
538221345Sdim///
539221345Sdim/// Both selectors and actual method declarations may be classified
540221345Sdim/// into families.  Method families may impose additional restrictions
541221345Sdim/// beyond their selector name; for example, a method called '_init'
542221345Sdim/// that returns void is not considered to be in the 'init' family
543221345Sdim/// (but would be if it returned 'id').  It is also possible to
544221345Sdim/// explicitly change or remove a method's family.  Therefore the
545221345Sdim/// method's family should be considered the single source of truth.
546221345Sdimenum ObjCMethodFamily {
547221345Sdim  /// \brief No particular method family.
548221345Sdim  OMF_None,
549221345Sdim
550221345Sdim  // Selectors in these families may have arbitrary arity, may be
551221345Sdim  // written with arbitrary leading underscores, and may have
552221345Sdim  // additional CamelCase "words" in their first selector chunk
553221345Sdim  // following the family name.
554221345Sdim  OMF_alloc,
555221345Sdim  OMF_copy,
556221345Sdim  OMF_init,
557221345Sdim  OMF_mutableCopy,
558221345Sdim  OMF_new,
559221345Sdim
560221345Sdim  // These families are singletons consisting only of the nullary
561221345Sdim  // selector with the given name.
562221345Sdim  OMF_autorelease,
563221345Sdim  OMF_dealloc,
564226633Sdim  OMF_finalize,
565221345Sdim  OMF_release,
566221345Sdim  OMF_retain,
567223017Sdim  OMF_retainCount,
568224145Sdim  OMF_self,
569224145Sdim
570224145Sdim  // performSelector families
571224145Sdim  OMF_performSelector
572221345Sdim};
573221345Sdim
574221345Sdim/// Enough bits to store any enumerator in ObjCMethodFamily or
575221345Sdim/// InvalidObjCMethodFamily.
576221345Sdimenum { ObjCMethodFamilyBitWidth = 4 };
577221345Sdim
578239462Sdim/// \brief An invalid value of ObjCMethodFamily.
579221345Sdimenum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
580221345Sdim
581263508Sdim/// \brief A family of Objective-C methods.
582263508Sdim///
583263508Sdim/// These are family of methods whose result type is initially 'id', but
584263508Sdim/// but are candidate for the result type to be changed to 'instancetype'.
585263508Sdimenum ObjCInstanceTypeFamily {
586263508Sdim  OIT_None,
587263508Sdim  OIT_Array,
588263508Sdim  OIT_Dictionary,
589263508Sdim  OIT_Singleton,
590263508Sdim  OIT_Init,
591263508Sdim  OIT_ReturnsSelf
592263508Sdim};
593263508Sdim
594239462Sdim/// \brief Smart pointer class that efficiently represents Objective-C method
595239462Sdim/// names.
596239462Sdim///
597239462Sdim/// This class will either point to an IdentifierInfo or a
598193326Sed/// MultiKeywordSelector (which is private). This enables us to optimize
599198092Srdivacky/// selectors that take no arguments and selectors that take 1 argument, which
600193326Sed/// accounts for 78% of all selectors in Cocoa.h.
601193326Sedclass Selector {
602226633Sdim  friend class Diagnostic;
603198092Srdivacky
604193326Sed  enum IdentifierInfoFlag {
605239462Sdim    // Empty selector = 0.
606193326Sed    ZeroArg  = 0x1,
607193326Sed    OneArg   = 0x2,
608239462Sdim    MultiArg = 0x3,
609193326Sed    ArgFlags = ZeroArg|OneArg
610193326Sed  };
611193326Sed  uintptr_t InfoPtr; // a pointer to the MultiKeywordSelector or IdentifierInfo.
612198092Srdivacky
613193326Sed  Selector(IdentifierInfo *II, unsigned nArgs) {
614193326Sed    InfoPtr = reinterpret_cast<uintptr_t>(II);
615193326Sed    assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
616193326Sed    assert(nArgs < 2 && "nArgs not equal to 0/1");
617193326Sed    InfoPtr |= nArgs+1;
618193326Sed  }
619193326Sed  Selector(MultiKeywordSelector *SI) {
620193326Sed    InfoPtr = reinterpret_cast<uintptr_t>(SI);
621193326Sed    assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
622239462Sdim    InfoPtr |= MultiArg;
623193326Sed  }
624198092Srdivacky
625193326Sed  IdentifierInfo *getAsIdentifierInfo() const {
626239462Sdim    if (getIdentifierInfoFlag() < MultiArg)
627193326Sed      return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
628193326Sed    return 0;
629193326Sed  }
630239462Sdim  MultiKeywordSelector *getMultiKeywordSelector() const {
631239462Sdim    return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
632239462Sdim  }
633239462Sdim
634193326Sed  unsigned getIdentifierInfoFlag() const {
635193326Sed    return InfoPtr & ArgFlags;
636193326Sed  }
637193326Sed
638221345Sdim  static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
639221345Sdim
640193326Sedpublic:
641193326Sed  friend class SelectorTable; // only the SelectorTable can create these
642193326Sed  friend class DeclarationName; // and the AST's DeclarationName.
643193326Sed
644193326Sed  /// The default ctor should only be used when creating data structures that
645193326Sed  ///  will contain selectors.
646193326Sed  Selector() : InfoPtr(0) {}
647193326Sed  Selector(uintptr_t V) : InfoPtr(V) {}
648193326Sed
649193326Sed  /// operator==/!= - Indicate whether the specified selectors are identical.
650193326Sed  bool operator==(Selector RHS) const {
651193326Sed    return InfoPtr == RHS.InfoPtr;
652193326Sed  }
653193326Sed  bool operator!=(Selector RHS) const {
654193326Sed    return InfoPtr != RHS.InfoPtr;
655193326Sed  }
656193326Sed  void *getAsOpaquePtr() const {
657193326Sed    return reinterpret_cast<void*>(InfoPtr);
658193326Sed  }
659193326Sed
660193326Sed  /// \brief Determine whether this is the empty selector.
661193326Sed  bool isNull() const { return InfoPtr == 0; }
662193326Sed
663193326Sed  // Predicates to identify the selector type.
664198092Srdivacky  bool isKeywordSelector() const {
665198092Srdivacky    return getIdentifierInfoFlag() != ZeroArg;
666193326Sed  }
667198092Srdivacky  bool isUnarySelector() const {
668193326Sed    return getIdentifierInfoFlag() == ZeroArg;
669193326Sed  }
670193326Sed  unsigned getNumArgs() const;
671218893Sdim
672218893Sdim
673218893Sdim  /// \brief Retrieve the identifier at a given position in the selector.
674218893Sdim  ///
675218893Sdim  /// Note that the identifier pointer returned may be NULL. Clients that only
676218893Sdim  /// care about the text of the identifier string, and not the specific,
677218893Sdim  /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
678218893Sdim  /// an empty string when the identifier pointer would be NULL.
679218893Sdim  ///
680218893Sdim  /// \param argIndex The index for which we want to retrieve the identifier.
681218893Sdim  /// This index shall be less than \c getNumArgs() unless this is a keyword
682218893Sdim  /// selector, in which case 0 is the only permissible value.
683218893Sdim  ///
684218893Sdim  /// \returns the uniqued identifier for this slot, or NULL if this slot has
685218893Sdim  /// no corresponding identifier.
686193326Sed  IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
687218893Sdim
688218893Sdim  /// \brief Retrieve the name at a given position in the selector.
689218893Sdim  ///
690218893Sdim  /// \param argIndex The index for which we want to retrieve the name.
691218893Sdim  /// This index shall be less than \c getNumArgs() unless this is a keyword
692218893Sdim  /// selector, in which case 0 is the only permissible value.
693218893Sdim  ///
694218893Sdim  /// \returns the name for this slot, which may be the empty string if no
695218893Sdim  /// name was supplied.
696226633Sdim  StringRef getNameForSlot(unsigned argIndex) const;
697218893Sdim
698239462Sdim  /// \brief Derive the full selector name (e.g. "foo:bar:") and return
699193326Sed  /// it as an std::string.
700239462Sdim  // FIXME: Add a print method that uses a raw_ostream.
701193326Sed  std::string getAsString() const;
702198092Srdivacky
703239462Sdim  /// \brief Derive the conventional family of this method.
704221345Sdim  ObjCMethodFamily getMethodFamily() const {
705221345Sdim    return getMethodFamilyImpl(*this);
706221345Sdim  }
707221345Sdim
708193326Sed  static Selector getEmptyMarker() {
709193326Sed    return Selector(uintptr_t(-1));
710193326Sed  }
711193326Sed  static Selector getTombstoneMarker() {
712193326Sed    return Selector(uintptr_t(-2));
713193326Sed  }
714263508Sdim
715263508Sdim  static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
716193326Sed};
717193326Sed
718239462Sdim/// \brief This table allows us to fully hide how we implement
719193326Sed/// multi-keyword caching.
720193326Sedclass SelectorTable {
721193326Sed  void *Impl;  // Actually a SelectorTableImpl
722243830Sdim  SelectorTable(const SelectorTable &) LLVM_DELETED_FUNCTION;
723243830Sdim  void operator=(const SelectorTable &) LLVM_DELETED_FUNCTION;
724193326Sedpublic:
725193326Sed  SelectorTable();
726193326Sed  ~SelectorTable();
727193326Sed
728239462Sdim  /// \brief Can create any sort of selector.
729239462Sdim  ///
730239462Sdim  /// \p NumArgs indicates whether this is a no argument selector "foo", a
731239462Sdim  /// single argument selector "foo:" or multi-argument "foo:bar:".
732193326Sed  Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
733198092Srdivacky
734193326Sed  Selector getUnarySelector(IdentifierInfo *ID) {
735193326Sed    return Selector(ID, 1);
736193326Sed  }
737193326Sed  Selector getNullarySelector(IdentifierInfo *ID) {
738193326Sed    return Selector(ID, 0);
739193326Sed  }
740193326Sed
741239462Sdim  /// \brief Return the total amount of memory allocated for managing selectors.
742221345Sdim  size_t getTotalMemory() const;
743221345Sdim
744263508Sdim  /// \brief Return the default setter name for the given identifier.
745239462Sdim  ///
746239462Sdim  /// This is "set" + \p Name where the initial character of \p Name
747193326Sed  /// has been capitalized.
748263508Sdim  static SmallString<64> constructSetterName(StringRef Name);
749263508Sdim
750263508Sdim  /// \brief Return the default setter selector for the given identifier.
751263508Sdim  ///
752263508Sdim  /// This is "set" + \p Name where the initial character of \p Name
753263508Sdim  /// has been capitalized.
754263508Sdim  static Selector constructSetterSelector(IdentifierTable &Idents,
755263508Sdim                                          SelectorTable &SelTable,
756263508Sdim                                          const IdentifierInfo *Name);
757193326Sed};
758193326Sed
759193326Sed/// DeclarationNameExtra - Common base of the MultiKeywordSelector,
760193326Sed/// CXXSpecialName, and CXXOperatorIdName classes, all of which are
761193326Sed/// private classes that describe different kinds of names.
762193326Sedclass DeclarationNameExtra {
763193326Sedpublic:
764193326Sed  /// ExtraKind - The kind of "extra" information stored in the
765193326Sed  /// DeclarationName. See @c ExtraKindOrNumArgs for an explanation of
766193326Sed  /// how these enumerator values are used.
767193326Sed  enum ExtraKind {
768193326Sed    CXXConstructor = 0,
769193326Sed    CXXDestructor,
770193326Sed    CXXConversionFunction,
771193326Sed#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
772193326Sed    CXXOperator##Name,
773193326Sed#include "clang/Basic/OperatorKinds.def"
774199990Srdivacky    CXXLiteralOperator,
775193326Sed    CXXUsingDirective,
776193326Sed    NUM_EXTRA_KINDS
777193326Sed  };
778193326Sed
779193326Sed  /// ExtraKindOrNumArgs - Either the kind of C++ special name or
780193326Sed  /// operator-id (if the value is one of the CXX* enumerators of
781193326Sed  /// ExtraKind), in which case the DeclarationNameExtra is also a
782199990Srdivacky  /// CXXSpecialName, (for CXXConstructor, CXXDestructor, or
783199990Srdivacky  /// CXXConversionFunction) CXXOperatorIdName, or CXXLiteralOperatorName,
784199990Srdivacky  /// it may be also name common to C++ using-directives (CXXUsingDirective),
785199990Srdivacky  /// otherwise it is NUM_EXTRA_KINDS+NumArgs, where NumArgs is the number of
786193326Sed  /// arguments in the Objective-C selector, in which case the
787193326Sed  /// DeclarationNameExtra is also a MultiKeywordSelector.
788193326Sed  unsigned ExtraKindOrNumArgs;
789193326Sed};
790193326Sed
791193326Sed}  // end namespace clang
792193326Sed
793193326Sednamespace llvm {
794193326Sed/// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
795193326Sed/// DenseSets.
796193326Sedtemplate <>
797193326Sedstruct DenseMapInfo<clang::Selector> {
798193326Sed  static inline clang::Selector getEmptyKey() {
799193326Sed    return clang::Selector::getEmptyMarker();
800193326Sed  }
801193326Sed  static inline clang::Selector getTombstoneKey() {
802198092Srdivacky    return clang::Selector::getTombstoneMarker();
803193326Sed  }
804198092Srdivacky
805193326Sed  static unsigned getHashValue(clang::Selector S);
806198092Srdivacky
807193326Sed  static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
808193326Sed    return LHS == RHS;
809193326Sed  }
810193326Sed};
811205219Srdivacky
812200583Srdivackytemplate <>
813200583Srdivackystruct isPodLike<clang::Selector> { static const bool value = true; };
814193326Sed
815218893Sdimtemplate<>
816218893Sdimclass PointerLikeTypeTraits<clang::Selector> {
817218893Sdimpublic:
818218893Sdim  static inline const void *getAsVoidPointer(clang::Selector P) {
819218893Sdim    return P.getAsOpaquePtr();
820218893Sdim  }
821218893Sdim  static inline clang::Selector getFromVoidPointer(const void *P) {
822218893Sdim    return clang::Selector(reinterpret_cast<uintptr_t>(P));
823218893Sdim  }
824218893Sdim  enum { NumLowBitsAvailable = 0 };
825218893Sdim};
826200583Srdivacky
827193326Sed// Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
828193326Sed// are not guaranteed to be 8-byte aligned.
829193326Sedtemplate<>
830193326Sedclass PointerLikeTypeTraits<clang::IdentifierInfo*> {
831193326Sedpublic:
832193326Sed  static inline void *getAsVoidPointer(clang::IdentifierInfo* P) {
833198092Srdivacky    return P;
834193326Sed  }
835193326Sed  static inline clang::IdentifierInfo *getFromVoidPointer(void *P) {
836193326Sed    return static_cast<clang::IdentifierInfo*>(P);
837193326Sed  }
838193326Sed  enum { NumLowBitsAvailable = 1 };
839193326Sed};
840193326Sed
841193326Sedtemplate<>
842193326Sedclass PointerLikeTypeTraits<const clang::IdentifierInfo*> {
843193326Sedpublic:
844193326Sed  static inline const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
845198092Srdivacky    return P;
846193326Sed  }
847193326Sed  static inline const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
848193326Sed    return static_cast<const clang::IdentifierInfo*>(P);
849193326Sed  }
850193326Sed  enum { NumLowBitsAvailable = 1 };
851193326Sed};
852193326Sed
853193326Sed}  // end namespace llvm
854193326Sed#endif
855