IdentifierTable.h revision 249423
1193326Sed//===--- IdentifierTable.h - Hash table for identifier lookup ---*- C++ -*-===// 2193326Sed// 3193326Sed// The LLVM Compiler Infrastructure 4193326Sed// 5193326Sed// This file is distributed under the University of Illinois Open Source 6193326Sed// License. See LICENSE.TXT for details. 7193326Sed// 8193326Sed//===----------------------------------------------------------------------===// 9239462Sdim/// 10239462Sdim/// \file 11239462Sdim/// \brief Defines the clang::IdentifierInfo, clang::IdentifierTable, and 12239462Sdim/// clang::Selector interfaces. 13239462Sdim/// 14193326Sed//===----------------------------------------------------------------------===// 15193326Sed 16193326Sed#ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 17193326Sed#define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 18193326Sed 19249423Sdim#include "clang/Basic/LLVM.h" 20193326Sed#include "clang/Basic/OperatorKinds.h" 21193326Sed#include "clang/Basic/TokenKinds.h" 22193326Sed#include "llvm/ADT/StringMap.h" 23205219Srdivacky#include "llvm/ADT/StringRef.h" 24193326Sed#include "llvm/Support/PointerLikeTypeTraits.h" 25218893Sdim#include <cassert> 26198092Srdivacky#include <string> 27193326Sed 28193326Sednamespace llvm { 29193326Sed template <typename T> struct DenseMapInfo; 30193326Sed} 31193326Sed 32193326Sednamespace clang { 33193326Sed class LangOptions; 34193326Sed class IdentifierInfo; 35193326Sed class IdentifierTable; 36193326Sed class SourceLocation; 37193326Sed class MultiKeywordSelector; // private class used by Selector 38193326Sed class DeclarationName; // AST class that stores declaration names 39193326Sed 40239462Sdim /// \brief A simple pair of identifier info and location. 41193326Sed typedef std::pair<IdentifierInfo*, SourceLocation> IdentifierLocPair; 42198092Srdivacky 43198092Srdivacky 44239462Sdim/// One of these records is kept for each identifier that 45239462Sdim/// is lexed. This contains information about whether the token was \#define'd, 46193326Sed/// is a language keyword, or if it is a front-end token of some sort (e.g. a 47193326Sed/// variable or function name). The preprocessor keeps this information in a 48198092Srdivacky/// set, and all tok::identifier tokens have a pointer to one of these. 49193326Sedclass IdentifierInfo { 50226633Sdim unsigned TokenID : 9; // Front-end token ID or tok::identifier. 51193326Sed // Objective-C keyword ('protocol' in '@protocol') or builtin (__builtin_inf). 52193326Sed // First NUM_OBJC_KEYWORDS values are for Objective-C, the remaining values 53193326Sed // are for builtins. 54218893Sdim unsigned ObjCOrBuiltinID :11; 55193326Sed bool HasMacro : 1; // True if there is a #define for this. 56243830Sdim bool HadMacro : 1; // True if there was a #define for this. 57193326Sed bool IsExtension : 1; // True if identifier is a lang extension. 58226633Sdim bool IsCXX11CompatKeyword : 1; // True if identifier is a keyword in C++11. 59193326Sed bool IsPoisoned : 1; // True if identifier is poisoned. 60193326Sed bool IsCPPOperatorKeyword : 1; // True if ident is a C++ operator keyword. 61193326Sed bool NeedsHandleIdentifier : 1; // See "RecomputeNeedsHandleIdentifier". 62234353Sdim bool IsFromAST : 1; // True if identifier was loaded (at least 63234353Sdim // partially) from an AST file. 64234353Sdim bool ChangedAfterLoad : 1; // True if identifier has changed from the 65234353Sdim // definition loaded from an AST file. 66212904Sdim bool RevertedTokenID : 1; // True if RevertTokenIDToIdentifier was 67212904Sdim // called. 68234353Sdim bool OutOfDate : 1; // True if there may be additional 69234353Sdim // information about this identifier 70234353Sdim // stored externally. 71239462Sdim bool IsModulesImport : 1; // True if this is the 'import' contextual 72234353Sdim // keyword. 73243830Sdim // 32-bit word is filled. 74243830Sdim 75193326Sed void *FETokenInfo; // Managed by the language front-end. 76193326Sed llvm::StringMapEntry<IdentifierInfo*> *Entry; 77198092Srdivacky 78243830Sdim IdentifierInfo(const IdentifierInfo&) LLVM_DELETED_FUNCTION; 79243830Sdim void operator=(const IdentifierInfo&) LLVM_DELETED_FUNCTION; 80193326Sed 81198092Srdivacky friend class IdentifierTable; 82218893Sdim 83193326Sedpublic: 84193326Sed IdentifierInfo(); 85193326Sed 86198092Srdivacky 87239462Sdim /// \brief Return true if this is the identifier for the specified string. 88239462Sdim /// 89193326Sed /// This is intended to be used for string literals only: II->isStr("foo"). 90193326Sed template <std::size_t StrLen> 91193326Sed bool isStr(const char (&Str)[StrLen]) const { 92198398Srdivacky return getLength() == StrLen-1 && !memcmp(getNameStart(), Str, StrLen-1); 93193326Sed } 94198092Srdivacky 95239462Sdim /// \brief Return the beginning of the actual null-terminated string for this 96239462Sdim /// identifier. 97193326Sed /// 98198398Srdivacky const char *getNameStart() const { 99193326Sed if (Entry) return Entry->getKeyData(); 100193326Sed // FIXME: This is gross. It would be best not to embed specific details 101193326Sed // of the PTH file format here. 102198092Srdivacky // The 'this' pointer really points to a 103193326Sed // std::pair<IdentifierInfo, const char*>, where internal pointer 104193326Sed // points to the external string data. 105210299Sed typedef std::pair<IdentifierInfo, const char*> actualtype; 106210299Sed return ((const actualtype*) this)->second; 107193326Sed } 108198092Srdivacky 109239462Sdim /// \brief Efficiently return the length of this identifier info. 110193326Sed /// 111193326Sed unsigned getLength() const { 112193326Sed if (Entry) return Entry->getKeyLength(); 113193326Sed // FIXME: This is gross. It would be best not to embed specific details 114193326Sed // of the PTH file format here. 115198092Srdivacky // The 'this' pointer really points to a 116193326Sed // std::pair<IdentifierInfo, const char*>, where internal pointer 117193326Sed // points to the external string data. 118210299Sed typedef std::pair<IdentifierInfo, const char*> actualtype; 119210299Sed const char* p = ((const actualtype*) this)->second - 2; 120198398Srdivacky return (((unsigned) p[0]) | (((unsigned) p[1]) << 8)) - 1; 121193326Sed } 122198092Srdivacky 123239462Sdim /// \brief Return the actual identifier string. 124226633Sdim StringRef getName() const { 125226633Sdim return StringRef(getNameStart(), getLength()); 126198398Srdivacky } 127198398Srdivacky 128239462Sdim /// \brief Return true if this identifier is \#defined to some other value. 129193326Sed bool hasMacroDefinition() const { 130193326Sed return HasMacro; 131193326Sed } 132193326Sed void setHasMacroDefinition(bool Val) { 133193326Sed if (HasMacro == Val) return; 134198092Srdivacky 135193326Sed HasMacro = Val; 136243830Sdim if (Val) { 137193326Sed NeedsHandleIdentifier = 1; 138243830Sdim HadMacro = true; 139243830Sdim } else { 140193326Sed RecomputeNeedsHandleIdentifier(); 141243830Sdim } 142193326Sed } 143243830Sdim /// \brief Returns true if this identifier was \#defined to some value at any 144243830Sdim /// moment. In this case there should be an entry for the identifier in the 145243830Sdim /// macro history table in Preprocessor. 146243830Sdim bool hadMacroDefinition() const { 147243830Sdim return HadMacro; 148243830Sdim } 149198092Srdivacky 150212904Sdim /// getTokenID - If this is a source-language token (e.g. 'for'), this API 151193326Sed /// can be used to cause the lexer to map identifiers to source-language 152193326Sed /// tokens. 153193326Sed tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; } 154198092Srdivacky 155212904Sdim /// \brief True if RevertTokenIDToIdentifier() was called. 156212904Sdim bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; } 157212904Sdim 158212904Sdim /// \brief Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2 159212904Sdim /// compatibility. 160212904Sdim /// 161212904Sdim /// TokenID is normally read-only but there are 2 instances where we revert it 162212904Sdim /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens 163212904Sdim /// using this method so we can inform serialization about it. 164212904Sdim void RevertTokenIDToIdentifier() { 165212904Sdim assert(TokenID != tok::identifier && "Already at tok::identifier"); 166212904Sdim TokenID = tok::identifier; 167212904Sdim RevertedTokenID = true; 168212904Sdim } 169212904Sdim 170239462Sdim /// \brief Return the preprocessor keyword ID for this identifier. 171239462Sdim /// 172193326Sed /// For example, "define" will return tok::pp_define. 173193326Sed tok::PPKeywordKind getPPKeywordID() const; 174198092Srdivacky 175239462Sdim /// \brief Return the Objective-C keyword ID for the this identifier. 176239462Sdim /// 177239462Sdim /// For example, 'class' will return tok::objc_class if ObjC is enabled. 178193326Sed tok::ObjCKeywordKind getObjCKeywordID() const { 179198092Srdivacky if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS) 180193326Sed return tok::ObjCKeywordKind(ObjCOrBuiltinID); 181193326Sed else 182193326Sed return tok::objc_not_keyword; 183193326Sed } 184193326Sed void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; } 185193326Sed 186193326Sed /// getBuiltinID - Return a value indicating whether this is a builtin 187193326Sed /// function. 0 is not-built-in. 1 is builtin-for-some-nonprimary-target. 188193326Sed /// 2+ are specific builtin functions. 189198092Srdivacky unsigned getBuiltinID() const { 190193326Sed if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS) 191198092Srdivacky return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS; 192193326Sed else 193193326Sed return 0; 194193326Sed } 195193326Sed void setBuiltinID(unsigned ID) { 196193326Sed ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS; 197198092Srdivacky assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID 198193326Sed && "ID too large for field!"); 199193326Sed } 200193326Sed 201193326Sed unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; } 202193326Sed void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; } 203193326Sed 204193326Sed /// get/setExtension - Initialize information about whether or not this 205193326Sed /// language token is an extension. This controls extension warnings, and is 206193326Sed /// only valid if a custom token ID is set. 207193326Sed bool isExtensionToken() const { return IsExtension; } 208193326Sed void setIsExtensionToken(bool Val) { 209193326Sed IsExtension = Val; 210193326Sed if (Val) 211193326Sed NeedsHandleIdentifier = 1; 212193326Sed else 213193326Sed RecomputeNeedsHandleIdentifier(); 214193326Sed } 215198092Srdivacky 216226633Sdim /// is/setIsCXX11CompatKeyword - Initialize information about whether or not 217226633Sdim /// this language token is a keyword in C++11. This controls compatibility 218226633Sdim /// warnings, and is only true when not parsing C++11. Once a compatibility 219226633Sdim /// problem has been diagnosed with this keyword, the flag will be cleared. 220226633Sdim bool isCXX11CompatKeyword() const { return IsCXX11CompatKeyword; } 221226633Sdim void setIsCXX11CompatKeyword(bool Val) { 222226633Sdim IsCXX11CompatKeyword = Val; 223226633Sdim if (Val) 224226633Sdim NeedsHandleIdentifier = 1; 225226633Sdim else 226226633Sdim RecomputeNeedsHandleIdentifier(); 227226633Sdim } 228226633Sdim 229193326Sed /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the 230193326Sed /// Preprocessor will emit an error every time this token is used. 231193326Sed void setIsPoisoned(bool Value = true) { 232193326Sed IsPoisoned = Value; 233193326Sed if (Value) 234193326Sed NeedsHandleIdentifier = 1; 235193326Sed else 236193326Sed RecomputeNeedsHandleIdentifier(); 237193326Sed } 238198092Srdivacky 239193326Sed /// isPoisoned - Return true if this token has been poisoned. 240193326Sed bool isPoisoned() const { return IsPoisoned; } 241198092Srdivacky 242193326Sed /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether 243193326Sed /// this identifier is a C++ alternate representation of an operator. 244193326Sed void setIsCPlusPlusOperatorKeyword(bool Val = true) { 245193326Sed IsCPPOperatorKeyword = Val; 246193326Sed if (Val) 247193326Sed NeedsHandleIdentifier = 1; 248193326Sed else 249193326Sed RecomputeNeedsHandleIdentifier(); 250193326Sed } 251193326Sed bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; } 252193326Sed 253193326Sed /// getFETokenInfo/setFETokenInfo - The language front-end is allowed to 254193326Sed /// associate arbitrary metadata with this token. 255193326Sed template<typename T> 256193326Sed T *getFETokenInfo() const { return static_cast<T*>(FETokenInfo); } 257193326Sed void setFETokenInfo(void *T) { FETokenInfo = T; } 258193326Sed 259193326Sed /// isHandleIdentifierCase - Return true if the Preprocessor::HandleIdentifier 260193326Sed /// must be called on a token of this identifier. If this returns false, we 261193326Sed /// know that HandleIdentifier will not affect the token. 262193326Sed bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; } 263198092Srdivacky 264212904Sdim /// isFromAST - Return true if the identifier in its current state was loaded 265212904Sdim /// from an AST file. 266212904Sdim bool isFromAST() const { return IsFromAST; } 267212904Sdim 268234353Sdim void setIsFromAST() { IsFromAST = true; } 269212904Sdim 270234353Sdim /// \brief Determine whether this identifier has changed since it was loaded 271234353Sdim /// from an AST file. 272234353Sdim bool hasChangedSinceDeserialization() const { 273234353Sdim return ChangedAfterLoad; 274234353Sdim } 275234353Sdim 276234353Sdim /// \brief Note that this identifier has changed since it was loaded from 277234353Sdim /// an AST file. 278234353Sdim void setChangedSinceDeserialization() { 279234353Sdim ChangedAfterLoad = true; 280234353Sdim } 281234353Sdim 282234353Sdim /// \brief Determine whether the information for this identifier is out of 283234353Sdim /// date with respect to the external source. 284234353Sdim bool isOutOfDate() const { return OutOfDate; } 285234353Sdim 286234353Sdim /// \brief Set whether the information for this identifier is out of 287234353Sdim /// date with respect to the external source. 288234353Sdim void setOutOfDate(bool OOD) { 289234353Sdim OutOfDate = OOD; 290234353Sdim if (OOD) 291234353Sdim NeedsHandleIdentifier = true; 292234353Sdim else 293234353Sdim RecomputeNeedsHandleIdentifier(); 294234353Sdim } 295234353Sdim 296234353Sdim /// \brief Determine whether this is the contextual keyword 297249423Sdim /// 'import'. 298234353Sdim bool isModulesImport() const { return IsModulesImport; } 299234353Sdim 300234353Sdim /// \brief Set whether this identifier is the contextual keyword 301249423Sdim /// 'import'. 302234353Sdim void setModulesImport(bool I) { 303234353Sdim IsModulesImport = I; 304234353Sdim if (I) 305234353Sdim NeedsHandleIdentifier = true; 306234353Sdim else 307234353Sdim RecomputeNeedsHandleIdentifier(); 308234353Sdim } 309234353Sdim 310193326Sedprivate: 311193326Sed /// RecomputeNeedsHandleIdentifier - The Preprocessor::HandleIdentifier does 312193326Sed /// several special (but rare) things to identifiers of various sorts. For 313193326Sed /// example, it changes the "for" keyword token from tok::identifier to 314193326Sed /// tok::for. 315193326Sed /// 316193326Sed /// This method is very tied to the definition of HandleIdentifier. Any 317193326Sed /// change to it should be reflected here. 318193326Sed void RecomputeNeedsHandleIdentifier() { 319193326Sed NeedsHandleIdentifier = 320193326Sed (isPoisoned() | hasMacroDefinition() | isCPlusPlusOperatorKeyword() | 321234353Sdim isExtensionToken() | isCXX11CompatKeyword() || isOutOfDate() || 322234353Sdim isModulesImport()); 323193326Sed } 324193326Sed}; 325193326Sed 326221345Sdim/// \brief an RAII object for [un]poisoning an identifier 327221345Sdim/// within a certain scope. II is allowed to be null, in 328221345Sdim/// which case, objects of this type have no effect. 329221345Sdimclass PoisonIdentifierRAIIObject { 330221345Sdim IdentifierInfo *const II; 331221345Sdim const bool OldValue; 332221345Sdimpublic: 333221345Sdim PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue) 334221345Sdim : II(II), OldValue(II ? II->isPoisoned() : false) { 335221345Sdim if(II) 336221345Sdim II->setIsPoisoned(NewValue); 337221345Sdim } 338221345Sdim 339221345Sdim ~PoisonIdentifierRAIIObject() { 340221345Sdim if(II) 341221345Sdim II->setIsPoisoned(OldValue); 342221345Sdim } 343221345Sdim}; 344221345Sdim 345218893Sdim/// \brief An iterator that walks over all of the known identifiers 346218893Sdim/// in the lookup table. 347218893Sdim/// 348218893Sdim/// Since this iterator uses an abstract interface via virtual 349218893Sdim/// functions, it uses an object-oriented interface rather than the 350218893Sdim/// more standard C++ STL iterator interface. In this OO-style 351218893Sdim/// iteration, the single function \c Next() provides dereference, 352218893Sdim/// advance, and end-of-sequence checking in a single 353218893Sdim/// operation. Subclasses of this iterator type will provide the 354218893Sdim/// actual functionality. 355218893Sdimclass IdentifierIterator { 356218893Sdimprivate: 357243830Sdim IdentifierIterator(const IdentifierIterator &) LLVM_DELETED_FUNCTION; 358243830Sdim void operator=(const IdentifierIterator &) LLVM_DELETED_FUNCTION; 359218893Sdim 360218893Sdimprotected: 361218893Sdim IdentifierIterator() { } 362218893Sdim 363218893Sdimpublic: 364218893Sdim virtual ~IdentifierIterator(); 365218893Sdim 366218893Sdim /// \brief Retrieve the next string in the identifier table and 367218893Sdim /// advances the iterator for the following string. 368218893Sdim /// 369218893Sdim /// \returns The next string in the identifier table. If there is 370226633Sdim /// no such string, returns an empty \c StringRef. 371226633Sdim virtual StringRef Next() = 0; 372218893Sdim}; 373218893Sdim 374193326Sed/// IdentifierInfoLookup - An abstract class used by IdentifierTable that 375193326Sed/// provides an interface for performing lookups from strings 376193326Sed/// (const char *) to IdentiferInfo objects. 377193326Sedclass IdentifierInfoLookup { 378193326Sedpublic: 379193326Sed virtual ~IdentifierInfoLookup(); 380198092Srdivacky 381193326Sed /// get - Return the identifier token info for the specified named identifier. 382193326Sed /// Unlike the version in IdentifierTable, this returns a pointer instead 383193326Sed /// of a reference. If the pointer is NULL then the IdentifierInfo cannot 384193326Sed /// be found. 385226633Sdim virtual IdentifierInfo* get(StringRef Name) = 0; 386218893Sdim 387218893Sdim /// \brief Retrieve an iterator into the set of all identifiers 388218893Sdim /// known to this identifier lookup source. 389218893Sdim /// 390218893Sdim /// This routine provides access to all of the identifiers known to 391218893Sdim /// the identifier lookup, allowing access to the contents of the 392218893Sdim /// identifiers without introducing the overhead of constructing 393218893Sdim /// IdentifierInfo objects for each. 394218893Sdim /// 395218893Sdim /// \returns A new iterator into the set of known identifiers. The 396218893Sdim /// caller is responsible for deleting this iterator. 397218893Sdim virtual IdentifierIterator *getIdentifiers() const; 398198092Srdivacky}; 399193326Sed 400193326Sed/// \brief An abstract class used to resolve numerical identifier 401193326Sed/// references (meaningful only to some external source) into 402193326Sed/// IdentifierInfo pointers. 403193326Sedclass ExternalIdentifierLookup { 404193326Sedpublic: 405193326Sed virtual ~ExternalIdentifierLookup(); 406193326Sed 407193326Sed /// \brief Return the identifier associated with the given ID number. 408193326Sed /// 409193326Sed /// The ID 0 is associated with the NULL identifier. 410193326Sed virtual IdentifierInfo *GetIdentifier(unsigned ID) = 0; 411193326Sed}; 412193326Sed 413239462Sdim/// \brief Implements an efficient mapping from strings to IdentifierInfo nodes. 414239462Sdim/// 415239462Sdim/// This has no other purpose, but this is an extremely performance-critical 416239462Sdim/// piece of the code, as each occurrence of every identifier goes through 417239462Sdim/// here when lexed. 418193326Sedclass IdentifierTable { 419193326Sed // Shark shows that using MallocAllocator is *much* slower than using this 420193326Sed // BumpPtrAllocator! 421193326Sed typedef llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator> HashTableTy; 422193326Sed HashTableTy HashTable; 423198092Srdivacky 424193326Sed IdentifierInfoLookup* ExternalLookup; 425193326Sed 426193326Sedpublic: 427239462Sdim /// \brief Create the identifier table, populating it with info about the 428239462Sdim /// language keywords for the language specified by \p LangOpts. 429193326Sed IdentifierTable(const LangOptions &LangOpts, 430193326Sed IdentifierInfoLookup* externalLookup = 0); 431198092Srdivacky 432193326Sed /// \brief Set the external identifier lookup mechanism. 433193326Sed void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) { 434193326Sed ExternalLookup = IILookup; 435193326Sed } 436193326Sed 437218893Sdim /// \brief Retrieve the external identifier lookup object, if any. 438218893Sdim IdentifierInfoLookup *getExternalIdentifierLookup() const { 439218893Sdim return ExternalLookup; 440218893Sdim } 441218893Sdim 442193326Sed llvm::BumpPtrAllocator& getAllocator() { 443193326Sed return HashTable.getAllocator(); 444193326Sed } 445198092Srdivacky 446239462Sdim /// \brief Return the identifier token info for the specified named 447239462Sdim /// identifier. 448226633Sdim IdentifierInfo &get(StringRef Name) { 449193326Sed llvm::StringMapEntry<IdentifierInfo*> &Entry = 450205219Srdivacky HashTable.GetOrCreateValue(Name); 451198092Srdivacky 452193326Sed IdentifierInfo *II = Entry.getValue(); 453193326Sed if (II) return *II; 454198092Srdivacky 455193326Sed // No entry; if we have an external lookup, look there first. 456193326Sed if (ExternalLookup) { 457205219Srdivacky II = ExternalLookup->get(Name); 458193326Sed if (II) { 459193326Sed // Cache in the StringMap for subsequent lookups. 460193326Sed Entry.setValue(II); 461193326Sed return *II; 462193326Sed } 463193326Sed } 464193326Sed 465193326Sed // Lookups failed, make a new IdentifierInfo. 466193326Sed void *Mem = getAllocator().Allocate<IdentifierInfo>(); 467193326Sed II = new (Mem) IdentifierInfo(); 468193326Sed Entry.setValue(II); 469193326Sed 470193326Sed // Make sure getName() knows how to find the IdentifierInfo 471193326Sed // contents. 472193326Sed II->Entry = &Entry; 473193326Sed 474193326Sed return *II; 475193326Sed } 476198092Srdivacky 477226633Sdim IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) { 478212904Sdim IdentifierInfo &II = get(Name); 479212904Sdim II.TokenID = TokenCode; 480226633Sdim assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large"); 481212904Sdim return II; 482212904Sdim } 483212904Sdim 484212904Sdim /// \brief Gets an IdentifierInfo for the given name without consulting 485212904Sdim /// external sources. 486193326Sed /// 487212904Sdim /// This is a version of get() meant for external sources that want to 488212904Sdim /// introduce or modify an identifier. If they called get(), they would 489212904Sdim /// likely end up in a recursion. 490226633Sdim IdentifierInfo &getOwn(StringRef Name) { 491193326Sed llvm::StringMapEntry<IdentifierInfo*> &Entry = 492224145Sdim HashTable.GetOrCreateValue(Name); 493198092Srdivacky 494193326Sed IdentifierInfo *II = Entry.getValue(); 495212904Sdim if (!II) { 496198092Srdivacky 497212904Sdim // Lookups failed, make a new IdentifierInfo. 498212904Sdim void *Mem = getAllocator().Allocate<IdentifierInfo>(); 499212904Sdim II = new (Mem) IdentifierInfo(); 500212904Sdim Entry.setValue(II); 501193326Sed 502212904Sdim // Make sure getName() knows how to find the IdentifierInfo 503212904Sdim // contents. 504212904Sdim II->Entry = &Entry; 505234353Sdim 506234353Sdim // If this is the 'import' contextual keyword, mark it as such. 507234353Sdim if (Name.equals("import")) 508234353Sdim II->setModulesImport(true); 509212904Sdim } 510193326Sed 511193326Sed return *II; 512193326Sed } 513193326Sed 514193326Sed typedef HashTableTy::const_iterator iterator; 515193326Sed typedef HashTableTy::const_iterator const_iterator; 516198092Srdivacky 517193326Sed iterator begin() const { return HashTable.begin(); } 518193326Sed iterator end() const { return HashTable.end(); } 519193326Sed unsigned size() const { return HashTable.size(); } 520198092Srdivacky 521239462Sdim /// \brief Print some statistics to stderr that indicate how well the 522193326Sed /// hashing is doing. 523193326Sed void PrintStats() const; 524198092Srdivacky 525193326Sed void AddKeywords(const LangOptions &LangOpts); 526193326Sed}; 527193326Sed 528239462Sdim/// \brief A family of Objective-C methods. 529239462Sdim/// 530239462Sdim/// These families have no inherent meaning in the language, but are 531221345Sdim/// nonetheless central enough in the existing implementations to 532221345Sdim/// merit direct AST support. While, in theory, arbitrary methods can 533221345Sdim/// be considered to form families, we focus here on the methods 534221345Sdim/// involving allocation and retain-count management, as these are the 535221345Sdim/// most "core" and the most likely to be useful to diverse clients 536221345Sdim/// without extra information. 537221345Sdim/// 538221345Sdim/// Both selectors and actual method declarations may be classified 539221345Sdim/// into families. Method families may impose additional restrictions 540221345Sdim/// beyond their selector name; for example, a method called '_init' 541221345Sdim/// that returns void is not considered to be in the 'init' family 542221345Sdim/// (but would be if it returned 'id'). It is also possible to 543221345Sdim/// explicitly change or remove a method's family. Therefore the 544221345Sdim/// method's family should be considered the single source of truth. 545221345Sdimenum ObjCMethodFamily { 546221345Sdim /// \brief No particular method family. 547221345Sdim OMF_None, 548221345Sdim 549221345Sdim // Selectors in these families may have arbitrary arity, may be 550221345Sdim // written with arbitrary leading underscores, and may have 551221345Sdim // additional CamelCase "words" in their first selector chunk 552221345Sdim // following the family name. 553221345Sdim OMF_alloc, 554221345Sdim OMF_copy, 555221345Sdim OMF_init, 556221345Sdim OMF_mutableCopy, 557221345Sdim OMF_new, 558221345Sdim 559221345Sdim // These families are singletons consisting only of the nullary 560221345Sdim // selector with the given name. 561221345Sdim OMF_autorelease, 562221345Sdim OMF_dealloc, 563226633Sdim OMF_finalize, 564221345Sdim OMF_release, 565221345Sdim OMF_retain, 566223017Sdim OMF_retainCount, 567224145Sdim OMF_self, 568224145Sdim 569224145Sdim // performSelector families 570224145Sdim OMF_performSelector 571221345Sdim}; 572221345Sdim 573221345Sdim/// Enough bits to store any enumerator in ObjCMethodFamily or 574221345Sdim/// InvalidObjCMethodFamily. 575221345Sdimenum { ObjCMethodFamilyBitWidth = 4 }; 576221345Sdim 577239462Sdim/// \brief An invalid value of ObjCMethodFamily. 578221345Sdimenum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 }; 579221345Sdim 580239462Sdim/// \brief Smart pointer class that efficiently represents Objective-C method 581239462Sdim/// names. 582239462Sdim/// 583239462Sdim/// This class will either point to an IdentifierInfo or a 584193326Sed/// MultiKeywordSelector (which is private). This enables us to optimize 585198092Srdivacky/// selectors that take no arguments and selectors that take 1 argument, which 586193326Sed/// accounts for 78% of all selectors in Cocoa.h. 587193326Sedclass Selector { 588226633Sdim friend class Diagnostic; 589198092Srdivacky 590193326Sed enum IdentifierInfoFlag { 591239462Sdim // Empty selector = 0. 592193326Sed ZeroArg = 0x1, 593193326Sed OneArg = 0x2, 594239462Sdim MultiArg = 0x3, 595193326Sed ArgFlags = ZeroArg|OneArg 596193326Sed }; 597193326Sed uintptr_t InfoPtr; // a pointer to the MultiKeywordSelector or IdentifierInfo. 598198092Srdivacky 599193326Sed Selector(IdentifierInfo *II, unsigned nArgs) { 600193326Sed InfoPtr = reinterpret_cast<uintptr_t>(II); 601193326Sed assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo"); 602193326Sed assert(nArgs < 2 && "nArgs not equal to 0/1"); 603193326Sed InfoPtr |= nArgs+1; 604193326Sed } 605193326Sed Selector(MultiKeywordSelector *SI) { 606193326Sed InfoPtr = reinterpret_cast<uintptr_t>(SI); 607193326Sed assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo"); 608239462Sdim InfoPtr |= MultiArg; 609193326Sed } 610198092Srdivacky 611193326Sed IdentifierInfo *getAsIdentifierInfo() const { 612239462Sdim if (getIdentifierInfoFlag() < MultiArg) 613193326Sed return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags); 614193326Sed return 0; 615193326Sed } 616239462Sdim MultiKeywordSelector *getMultiKeywordSelector() const { 617239462Sdim return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags); 618239462Sdim } 619239462Sdim 620193326Sed unsigned getIdentifierInfoFlag() const { 621193326Sed return InfoPtr & ArgFlags; 622193326Sed } 623193326Sed 624221345Sdim static ObjCMethodFamily getMethodFamilyImpl(Selector sel); 625221345Sdim 626193326Sedpublic: 627193326Sed friend class SelectorTable; // only the SelectorTable can create these 628193326Sed friend class DeclarationName; // and the AST's DeclarationName. 629193326Sed 630193326Sed /// The default ctor should only be used when creating data structures that 631193326Sed /// will contain selectors. 632193326Sed Selector() : InfoPtr(0) {} 633193326Sed Selector(uintptr_t V) : InfoPtr(V) {} 634193326Sed 635193326Sed /// operator==/!= - Indicate whether the specified selectors are identical. 636193326Sed bool operator==(Selector RHS) const { 637193326Sed return InfoPtr == RHS.InfoPtr; 638193326Sed } 639193326Sed bool operator!=(Selector RHS) const { 640193326Sed return InfoPtr != RHS.InfoPtr; 641193326Sed } 642193326Sed void *getAsOpaquePtr() const { 643193326Sed return reinterpret_cast<void*>(InfoPtr); 644193326Sed } 645193326Sed 646193326Sed /// \brief Determine whether this is the empty selector. 647193326Sed bool isNull() const { return InfoPtr == 0; } 648193326Sed 649193326Sed // Predicates to identify the selector type. 650198092Srdivacky bool isKeywordSelector() const { 651198092Srdivacky return getIdentifierInfoFlag() != ZeroArg; 652193326Sed } 653198092Srdivacky bool isUnarySelector() const { 654193326Sed return getIdentifierInfoFlag() == ZeroArg; 655193326Sed } 656193326Sed unsigned getNumArgs() const; 657218893Sdim 658218893Sdim 659218893Sdim /// \brief Retrieve the identifier at a given position in the selector. 660218893Sdim /// 661218893Sdim /// Note that the identifier pointer returned may be NULL. Clients that only 662218893Sdim /// care about the text of the identifier string, and not the specific, 663218893Sdim /// uniqued identifier pointer, should use \c getNameForSlot(), which returns 664218893Sdim /// an empty string when the identifier pointer would be NULL. 665218893Sdim /// 666218893Sdim /// \param argIndex The index for which we want to retrieve the identifier. 667218893Sdim /// This index shall be less than \c getNumArgs() unless this is a keyword 668218893Sdim /// selector, in which case 0 is the only permissible value. 669218893Sdim /// 670218893Sdim /// \returns the uniqued identifier for this slot, or NULL if this slot has 671218893Sdim /// no corresponding identifier. 672193326Sed IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const; 673218893Sdim 674218893Sdim /// \brief Retrieve the name at a given position in the selector. 675218893Sdim /// 676218893Sdim /// \param argIndex The index for which we want to retrieve the name. 677218893Sdim /// This index shall be less than \c getNumArgs() unless this is a keyword 678218893Sdim /// selector, in which case 0 is the only permissible value. 679218893Sdim /// 680218893Sdim /// \returns the name for this slot, which may be the empty string if no 681218893Sdim /// name was supplied. 682226633Sdim StringRef getNameForSlot(unsigned argIndex) const; 683218893Sdim 684239462Sdim /// \brief Derive the full selector name (e.g. "foo:bar:") and return 685193326Sed /// it as an std::string. 686239462Sdim // FIXME: Add a print method that uses a raw_ostream. 687193326Sed std::string getAsString() const; 688198092Srdivacky 689239462Sdim /// \brief Derive the conventional family of this method. 690221345Sdim ObjCMethodFamily getMethodFamily() const { 691221345Sdim return getMethodFamilyImpl(*this); 692221345Sdim } 693221345Sdim 694193326Sed static Selector getEmptyMarker() { 695193326Sed return Selector(uintptr_t(-1)); 696193326Sed } 697193326Sed static Selector getTombstoneMarker() { 698193326Sed return Selector(uintptr_t(-2)); 699193326Sed } 700193326Sed}; 701193326Sed 702239462Sdim/// \brief This table allows us to fully hide how we implement 703193326Sed/// multi-keyword caching. 704193326Sedclass SelectorTable { 705193326Sed void *Impl; // Actually a SelectorTableImpl 706243830Sdim SelectorTable(const SelectorTable &) LLVM_DELETED_FUNCTION; 707243830Sdim void operator=(const SelectorTable &) LLVM_DELETED_FUNCTION; 708193326Sedpublic: 709193326Sed SelectorTable(); 710193326Sed ~SelectorTable(); 711193326Sed 712239462Sdim /// \brief Can create any sort of selector. 713239462Sdim /// 714239462Sdim /// \p NumArgs indicates whether this is a no argument selector "foo", a 715239462Sdim /// single argument selector "foo:" or multi-argument "foo:bar:". 716193326Sed Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV); 717198092Srdivacky 718193326Sed Selector getUnarySelector(IdentifierInfo *ID) { 719193326Sed return Selector(ID, 1); 720193326Sed } 721193326Sed Selector getNullarySelector(IdentifierInfo *ID) { 722193326Sed return Selector(ID, 0); 723193326Sed } 724193326Sed 725239462Sdim /// \brief Return the total amount of memory allocated for managing selectors. 726221345Sdim size_t getTotalMemory() const; 727221345Sdim 728239462Sdim /// \brief Return the setter name for the given identifier. 729239462Sdim /// 730239462Sdim /// This is "set" + \p Name where the initial character of \p Name 731193326Sed /// has been capitalized. 732193326Sed static Selector constructSetterName(IdentifierTable &Idents, 733193326Sed SelectorTable &SelTable, 734234353Sdim const IdentifierInfo *Name); 735193326Sed}; 736193326Sed 737193326Sed/// DeclarationNameExtra - Common base of the MultiKeywordSelector, 738193326Sed/// CXXSpecialName, and CXXOperatorIdName classes, all of which are 739193326Sed/// private classes that describe different kinds of names. 740193326Sedclass DeclarationNameExtra { 741193326Sedpublic: 742193326Sed /// ExtraKind - The kind of "extra" information stored in the 743193326Sed /// DeclarationName. See @c ExtraKindOrNumArgs for an explanation of 744193326Sed /// how these enumerator values are used. 745193326Sed enum ExtraKind { 746193326Sed CXXConstructor = 0, 747193326Sed CXXDestructor, 748193326Sed CXXConversionFunction, 749193326Sed#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \ 750193326Sed CXXOperator##Name, 751193326Sed#include "clang/Basic/OperatorKinds.def" 752199990Srdivacky CXXLiteralOperator, 753193326Sed CXXUsingDirective, 754193326Sed NUM_EXTRA_KINDS 755193326Sed }; 756193326Sed 757193326Sed /// ExtraKindOrNumArgs - Either the kind of C++ special name or 758193326Sed /// operator-id (if the value is one of the CXX* enumerators of 759193326Sed /// ExtraKind), in which case the DeclarationNameExtra is also a 760199990Srdivacky /// CXXSpecialName, (for CXXConstructor, CXXDestructor, or 761199990Srdivacky /// CXXConversionFunction) CXXOperatorIdName, or CXXLiteralOperatorName, 762199990Srdivacky /// it may be also name common to C++ using-directives (CXXUsingDirective), 763199990Srdivacky /// otherwise it is NUM_EXTRA_KINDS+NumArgs, where NumArgs is the number of 764193326Sed /// arguments in the Objective-C selector, in which case the 765193326Sed /// DeclarationNameExtra is also a MultiKeywordSelector. 766193326Sed unsigned ExtraKindOrNumArgs; 767193326Sed}; 768193326Sed 769193326Sed} // end namespace clang 770193326Sed 771193326Sednamespace llvm { 772193326Sed/// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and 773193326Sed/// DenseSets. 774193326Sedtemplate <> 775193326Sedstruct DenseMapInfo<clang::Selector> { 776193326Sed static inline clang::Selector getEmptyKey() { 777193326Sed return clang::Selector::getEmptyMarker(); 778193326Sed } 779193326Sed static inline clang::Selector getTombstoneKey() { 780198092Srdivacky return clang::Selector::getTombstoneMarker(); 781193326Sed } 782198092Srdivacky 783193326Sed static unsigned getHashValue(clang::Selector S); 784198092Srdivacky 785193326Sed static bool isEqual(clang::Selector LHS, clang::Selector RHS) { 786193326Sed return LHS == RHS; 787193326Sed } 788193326Sed}; 789205219Srdivacky 790200583Srdivackytemplate <> 791200583Srdivackystruct isPodLike<clang::Selector> { static const bool value = true; }; 792193326Sed 793218893Sdimtemplate<> 794218893Sdimclass PointerLikeTypeTraits<clang::Selector> { 795218893Sdimpublic: 796218893Sdim static inline const void *getAsVoidPointer(clang::Selector P) { 797218893Sdim return P.getAsOpaquePtr(); 798218893Sdim } 799218893Sdim static inline clang::Selector getFromVoidPointer(const void *P) { 800218893Sdim return clang::Selector(reinterpret_cast<uintptr_t>(P)); 801218893Sdim } 802218893Sdim enum { NumLowBitsAvailable = 0 }; 803218893Sdim}; 804200583Srdivacky 805193326Sed// Provide PointerLikeTypeTraits for IdentifierInfo pointers, which 806193326Sed// are not guaranteed to be 8-byte aligned. 807193326Sedtemplate<> 808193326Sedclass PointerLikeTypeTraits<clang::IdentifierInfo*> { 809193326Sedpublic: 810193326Sed static inline void *getAsVoidPointer(clang::IdentifierInfo* P) { 811198092Srdivacky return P; 812193326Sed } 813193326Sed static inline clang::IdentifierInfo *getFromVoidPointer(void *P) { 814193326Sed return static_cast<clang::IdentifierInfo*>(P); 815193326Sed } 816193326Sed enum { NumLowBitsAvailable = 1 }; 817193326Sed}; 818193326Sed 819193326Sedtemplate<> 820193326Sedclass PointerLikeTypeTraits<const clang::IdentifierInfo*> { 821193326Sedpublic: 822193326Sed static inline const void *getAsVoidPointer(const clang::IdentifierInfo* P) { 823198092Srdivacky return P; 824193326Sed } 825193326Sed static inline const clang::IdentifierInfo *getFromVoidPointer(const void *P) { 826193326Sed return static_cast<const clang::IdentifierInfo*>(P); 827193326Sed } 828193326Sed enum { NumLowBitsAvailable = 1 }; 829193326Sed}; 830193326Sed 831193326Sed} // end namespace llvm 832193326Sed#endif 833