1193326Sed//===--- IdentifierTable.h - Hash table for identifier lookup ---*- C++ -*-===// 2193326Sed// 3193326Sed// The LLVM Compiler Infrastructure 4193326Sed// 5193326Sed// This file is distributed under the University of Illinois Open Source 6193326Sed// License. See LICENSE.TXT for details. 7193326Sed// 8193326Sed//===----------------------------------------------------------------------===// 9239462Sdim/// 10239462Sdim/// \file 11239462Sdim/// \brief Defines the clang::IdentifierInfo, clang::IdentifierTable, and 12239462Sdim/// clang::Selector interfaces. 13239462Sdim/// 14193326Sed//===----------------------------------------------------------------------===// 15193326Sed 16193326Sed#ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 17193326Sed#define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 18193326Sed 19249423Sdim#include "clang/Basic/LLVM.h" 20193326Sed#include "clang/Basic/OperatorKinds.h" 21193326Sed#include "clang/Basic/TokenKinds.h" 22263508Sdim#include "llvm/ADT/SmallString.h" 23193326Sed#include "llvm/ADT/StringMap.h" 24205219Srdivacky#include "llvm/ADT/StringRef.h" 25193326Sed#include "llvm/Support/PointerLikeTypeTraits.h" 26218893Sdim#include <cassert> 27198092Srdivacky#include <string> 28193326Sed 29193326Sednamespace llvm { 30193326Sed template <typename T> struct DenseMapInfo; 31193326Sed} 32193326Sed 33193326Sednamespace clang { 34193326Sed class LangOptions; 35193326Sed class IdentifierInfo; 36193326Sed class IdentifierTable; 37193326Sed class SourceLocation; 38193326Sed class MultiKeywordSelector; // private class used by Selector 39193326Sed class DeclarationName; // AST class that stores declaration names 40193326Sed 41239462Sdim /// \brief A simple pair of identifier info and location. 42193326Sed typedef std::pair<IdentifierInfo*, SourceLocation> IdentifierLocPair; 43198092Srdivacky 44198092Srdivacky 45239462Sdim/// One of these records is kept for each identifier that 46239462Sdim/// is lexed. This contains information about whether the token was \#define'd, 47193326Sed/// is a language keyword, or if it is a front-end token of some sort (e.g. a 48193326Sed/// variable or function name). The preprocessor keeps this information in a 49198092Srdivacky/// set, and all tok::identifier tokens have a pointer to one of these. 50193326Sedclass IdentifierInfo { 51226633Sdim unsigned TokenID : 9; // Front-end token ID or tok::identifier. 52193326Sed // Objective-C keyword ('protocol' in '@protocol') or builtin (__builtin_inf). 53193326Sed // First NUM_OBJC_KEYWORDS values are for Objective-C, the remaining values 54193326Sed // are for builtins. 55218893Sdim unsigned ObjCOrBuiltinID :11; 56193326Sed bool HasMacro : 1; // True if there is a #define for this. 57243830Sdim bool HadMacro : 1; // True if there was a #define for this. 58193326Sed bool IsExtension : 1; // True if identifier is a lang extension. 59226633Sdim bool IsCXX11CompatKeyword : 1; // True if identifier is a keyword in C++11. 60193326Sed bool IsPoisoned : 1; // True if identifier is poisoned. 61193326Sed bool IsCPPOperatorKeyword : 1; // True if ident is a C++ operator keyword. 62193326Sed bool NeedsHandleIdentifier : 1; // See "RecomputeNeedsHandleIdentifier". 63234353Sdim bool IsFromAST : 1; // True if identifier was loaded (at least 64234353Sdim // partially) from an AST file. 65234353Sdim bool ChangedAfterLoad : 1; // True if identifier has changed from the 66234353Sdim // definition loaded from an AST file. 67212904Sdim bool RevertedTokenID : 1; // True if RevertTokenIDToIdentifier was 68212904Sdim // called. 69234353Sdim bool OutOfDate : 1; // True if there may be additional 70234353Sdim // information about this identifier 71234353Sdim // stored externally. 72239462Sdim bool IsModulesImport : 1; // True if this is the 'import' contextual 73234353Sdim // keyword. 74243830Sdim // 32-bit word is filled. 75243830Sdim 76193326Sed void *FETokenInfo; // Managed by the language front-end. 77193326Sed llvm::StringMapEntry<IdentifierInfo*> *Entry; 78198092Srdivacky 79243830Sdim IdentifierInfo(const IdentifierInfo&) LLVM_DELETED_FUNCTION; 80243830Sdim void operator=(const IdentifierInfo&) LLVM_DELETED_FUNCTION; 81193326Sed 82198092Srdivacky friend class IdentifierTable; 83218893Sdim 84193326Sedpublic: 85193326Sed IdentifierInfo(); 86193326Sed 87198092Srdivacky 88239462Sdim /// \brief Return true if this is the identifier for the specified string. 89239462Sdim /// 90193326Sed /// This is intended to be used for string literals only: II->isStr("foo"). 91193326Sed template <std::size_t StrLen> 92193326Sed bool isStr(const char (&Str)[StrLen]) const { 93198398Srdivacky return getLength() == StrLen-1 && !memcmp(getNameStart(), Str, StrLen-1); 94193326Sed } 95198092Srdivacky 96239462Sdim /// \brief Return the beginning of the actual null-terminated string for this 97239462Sdim /// identifier. 98193326Sed /// 99198398Srdivacky const char *getNameStart() const { 100193326Sed if (Entry) return Entry->getKeyData(); 101193326Sed // FIXME: This is gross. It would be best not to embed specific details 102193326Sed // of the PTH file format here. 103198092Srdivacky // The 'this' pointer really points to a 104193326Sed // std::pair<IdentifierInfo, const char*>, where internal pointer 105193326Sed // points to the external string data. 106210299Sed typedef std::pair<IdentifierInfo, const char*> actualtype; 107210299Sed return ((const actualtype*) this)->second; 108193326Sed } 109198092Srdivacky 110239462Sdim /// \brief Efficiently return the length of this identifier info. 111193326Sed /// 112193326Sed unsigned getLength() const { 113193326Sed if (Entry) return Entry->getKeyLength(); 114193326Sed // FIXME: This is gross. It would be best not to embed specific details 115193326Sed // of the PTH file format here. 116198092Srdivacky // The 'this' pointer really points to a 117193326Sed // std::pair<IdentifierInfo, const char*>, where internal pointer 118193326Sed // points to the external string data. 119210299Sed typedef std::pair<IdentifierInfo, const char*> actualtype; 120210299Sed const char* p = ((const actualtype*) this)->second - 2; 121198398Srdivacky return (((unsigned) p[0]) | (((unsigned) p[1]) << 8)) - 1; 122193326Sed } 123198092Srdivacky 124239462Sdim /// \brief Return the actual identifier string. 125226633Sdim StringRef getName() const { 126226633Sdim return StringRef(getNameStart(), getLength()); 127198398Srdivacky } 128198398Srdivacky 129239462Sdim /// \brief Return true if this identifier is \#defined to some other value. 130193326Sed bool hasMacroDefinition() const { 131193326Sed return HasMacro; 132193326Sed } 133193326Sed void setHasMacroDefinition(bool Val) { 134193326Sed if (HasMacro == Val) return; 135198092Srdivacky 136193326Sed HasMacro = Val; 137243830Sdim if (Val) { 138193326Sed NeedsHandleIdentifier = 1; 139243830Sdim HadMacro = true; 140243830Sdim } else { 141193326Sed RecomputeNeedsHandleIdentifier(); 142243830Sdim } 143193326Sed } 144243830Sdim /// \brief Returns true if this identifier was \#defined to some value at any 145243830Sdim /// moment. In this case there should be an entry for the identifier in the 146243830Sdim /// macro history table in Preprocessor. 147243830Sdim bool hadMacroDefinition() const { 148243830Sdim return HadMacro; 149243830Sdim } 150198092Srdivacky 151263508Sdim /// If this is a source-language token (e.g. 'for'), this API 152193326Sed /// can be used to cause the lexer to map identifiers to source-language 153193326Sed /// tokens. 154193326Sed tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; } 155198092Srdivacky 156212904Sdim /// \brief True if RevertTokenIDToIdentifier() was called. 157212904Sdim bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; } 158212904Sdim 159212904Sdim /// \brief Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2 160212904Sdim /// compatibility. 161212904Sdim /// 162212904Sdim /// TokenID is normally read-only but there are 2 instances where we revert it 163212904Sdim /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens 164212904Sdim /// using this method so we can inform serialization about it. 165212904Sdim void RevertTokenIDToIdentifier() { 166212904Sdim assert(TokenID != tok::identifier && "Already at tok::identifier"); 167212904Sdim TokenID = tok::identifier; 168212904Sdim RevertedTokenID = true; 169212904Sdim } 170212904Sdim 171239462Sdim /// \brief Return the preprocessor keyword ID for this identifier. 172239462Sdim /// 173193326Sed /// For example, "define" will return tok::pp_define. 174193326Sed tok::PPKeywordKind getPPKeywordID() const; 175198092Srdivacky 176239462Sdim /// \brief Return the Objective-C keyword ID for the this identifier. 177239462Sdim /// 178239462Sdim /// For example, 'class' will return tok::objc_class if ObjC is enabled. 179193326Sed tok::ObjCKeywordKind getObjCKeywordID() const { 180198092Srdivacky if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS) 181193326Sed return tok::ObjCKeywordKind(ObjCOrBuiltinID); 182193326Sed else 183193326Sed return tok::objc_not_keyword; 184193326Sed } 185193326Sed void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; } 186193326Sed 187263508Sdim /// \brief Return a value indicating whether this is a builtin function. 188263508Sdim /// 189263508Sdim /// 0 is not-built-in. 1 is builtin-for-some-nonprimary-target. 190193326Sed /// 2+ are specific builtin functions. 191198092Srdivacky unsigned getBuiltinID() const { 192193326Sed if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS) 193198092Srdivacky return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS; 194193326Sed else 195193326Sed return 0; 196193326Sed } 197193326Sed void setBuiltinID(unsigned ID) { 198193326Sed ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS; 199198092Srdivacky assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID 200193326Sed && "ID too large for field!"); 201193326Sed } 202193326Sed 203193326Sed unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; } 204193326Sed void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; } 205193326Sed 206193326Sed /// get/setExtension - Initialize information about whether or not this 207193326Sed /// language token is an extension. This controls extension warnings, and is 208193326Sed /// only valid if a custom token ID is set. 209193326Sed bool isExtensionToken() const { return IsExtension; } 210193326Sed void setIsExtensionToken(bool Val) { 211193326Sed IsExtension = Val; 212193326Sed if (Val) 213193326Sed NeedsHandleIdentifier = 1; 214193326Sed else 215193326Sed RecomputeNeedsHandleIdentifier(); 216193326Sed } 217198092Srdivacky 218226633Sdim /// is/setIsCXX11CompatKeyword - Initialize information about whether or not 219226633Sdim /// this language token is a keyword in C++11. This controls compatibility 220226633Sdim /// warnings, and is only true when not parsing C++11. Once a compatibility 221226633Sdim /// problem has been diagnosed with this keyword, the flag will be cleared. 222226633Sdim bool isCXX11CompatKeyword() const { return IsCXX11CompatKeyword; } 223226633Sdim void setIsCXX11CompatKeyword(bool Val) { 224226633Sdim IsCXX11CompatKeyword = Val; 225226633Sdim if (Val) 226226633Sdim NeedsHandleIdentifier = 1; 227226633Sdim else 228226633Sdim RecomputeNeedsHandleIdentifier(); 229226633Sdim } 230226633Sdim 231193326Sed /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the 232193326Sed /// Preprocessor will emit an error every time this token is used. 233193326Sed void setIsPoisoned(bool Value = true) { 234193326Sed IsPoisoned = Value; 235193326Sed if (Value) 236193326Sed NeedsHandleIdentifier = 1; 237193326Sed else 238193326Sed RecomputeNeedsHandleIdentifier(); 239193326Sed } 240198092Srdivacky 241263508Sdim /// \brief Return true if this token has been poisoned. 242193326Sed bool isPoisoned() const { return IsPoisoned; } 243198092Srdivacky 244193326Sed /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether 245193326Sed /// this identifier is a C++ alternate representation of an operator. 246193326Sed void setIsCPlusPlusOperatorKeyword(bool Val = true) { 247193326Sed IsCPPOperatorKeyword = Val; 248193326Sed if (Val) 249193326Sed NeedsHandleIdentifier = 1; 250193326Sed else 251193326Sed RecomputeNeedsHandleIdentifier(); 252193326Sed } 253193326Sed bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; } 254193326Sed 255193326Sed /// getFETokenInfo/setFETokenInfo - The language front-end is allowed to 256193326Sed /// associate arbitrary metadata with this token. 257193326Sed template<typename T> 258193326Sed T *getFETokenInfo() const { return static_cast<T*>(FETokenInfo); } 259193326Sed void setFETokenInfo(void *T) { FETokenInfo = T; } 260193326Sed 261263508Sdim /// \brief Return true if the Preprocessor::HandleIdentifier must be called 262263508Sdim /// on a token of this identifier. 263263508Sdim /// 264263508Sdim /// If this returns false, we know that HandleIdentifier will not affect 265263508Sdim /// the token. 266193326Sed bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; } 267198092Srdivacky 268263508Sdim /// \brief Return true if the identifier in its current state was loaded 269212904Sdim /// from an AST file. 270212904Sdim bool isFromAST() const { return IsFromAST; } 271212904Sdim 272234353Sdim void setIsFromAST() { IsFromAST = true; } 273212904Sdim 274234353Sdim /// \brief Determine whether this identifier has changed since it was loaded 275234353Sdim /// from an AST file. 276234353Sdim bool hasChangedSinceDeserialization() const { 277234353Sdim return ChangedAfterLoad; 278234353Sdim } 279234353Sdim 280234353Sdim /// \brief Note that this identifier has changed since it was loaded from 281234353Sdim /// an AST file. 282234353Sdim void setChangedSinceDeserialization() { 283234353Sdim ChangedAfterLoad = true; 284234353Sdim } 285234353Sdim 286234353Sdim /// \brief Determine whether the information for this identifier is out of 287234353Sdim /// date with respect to the external source. 288234353Sdim bool isOutOfDate() const { return OutOfDate; } 289234353Sdim 290234353Sdim /// \brief Set whether the information for this identifier is out of 291234353Sdim /// date with respect to the external source. 292234353Sdim void setOutOfDate(bool OOD) { 293234353Sdim OutOfDate = OOD; 294234353Sdim if (OOD) 295234353Sdim NeedsHandleIdentifier = true; 296234353Sdim else 297234353Sdim RecomputeNeedsHandleIdentifier(); 298234353Sdim } 299234353Sdim 300263508Sdim /// \brief Determine whether this is the contextual keyword \c import. 301234353Sdim bool isModulesImport() const { return IsModulesImport; } 302234353Sdim 303263508Sdim /// \brief Set whether this identifier is the contextual keyword \c import. 304234353Sdim void setModulesImport(bool I) { 305234353Sdim IsModulesImport = I; 306234353Sdim if (I) 307234353Sdim NeedsHandleIdentifier = true; 308234353Sdim else 309234353Sdim RecomputeNeedsHandleIdentifier(); 310234353Sdim } 311234353Sdim 312193326Sedprivate: 313263508Sdim /// The Preprocessor::HandleIdentifier does several special (but rare) 314263508Sdim /// things to identifiers of various sorts. For example, it changes the 315263508Sdim /// \c for keyword token from tok::identifier to tok::for. 316193326Sed /// 317193326Sed /// This method is very tied to the definition of HandleIdentifier. Any 318193326Sed /// change to it should be reflected here. 319193326Sed void RecomputeNeedsHandleIdentifier() { 320193326Sed NeedsHandleIdentifier = 321193326Sed (isPoisoned() | hasMacroDefinition() | isCPlusPlusOperatorKeyword() | 322234353Sdim isExtensionToken() | isCXX11CompatKeyword() || isOutOfDate() || 323234353Sdim isModulesImport()); 324193326Sed } 325193326Sed}; 326193326Sed 327263508Sdim/// \brief An RAII object for [un]poisoning an identifier within a scope. 328263508Sdim/// 329263508Sdim/// \p II is allowed to be null, in which case objects of this type have 330263508Sdim/// no effect. 331221345Sdimclass PoisonIdentifierRAIIObject { 332221345Sdim IdentifierInfo *const II; 333221345Sdim const bool OldValue; 334221345Sdimpublic: 335221345Sdim PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue) 336221345Sdim : II(II), OldValue(II ? II->isPoisoned() : false) { 337221345Sdim if(II) 338221345Sdim II->setIsPoisoned(NewValue); 339221345Sdim } 340221345Sdim 341221345Sdim ~PoisonIdentifierRAIIObject() { 342221345Sdim if(II) 343221345Sdim II->setIsPoisoned(OldValue); 344221345Sdim } 345221345Sdim}; 346221345Sdim 347218893Sdim/// \brief An iterator that walks over all of the known identifiers 348218893Sdim/// in the lookup table. 349218893Sdim/// 350218893Sdim/// Since this iterator uses an abstract interface via virtual 351218893Sdim/// functions, it uses an object-oriented interface rather than the 352218893Sdim/// more standard C++ STL iterator interface. In this OO-style 353218893Sdim/// iteration, the single function \c Next() provides dereference, 354218893Sdim/// advance, and end-of-sequence checking in a single 355218893Sdim/// operation. Subclasses of this iterator type will provide the 356218893Sdim/// actual functionality. 357218893Sdimclass IdentifierIterator { 358218893Sdimprivate: 359243830Sdim IdentifierIterator(const IdentifierIterator &) LLVM_DELETED_FUNCTION; 360243830Sdim void operator=(const IdentifierIterator &) LLVM_DELETED_FUNCTION; 361218893Sdim 362218893Sdimprotected: 363218893Sdim IdentifierIterator() { } 364218893Sdim 365218893Sdimpublic: 366218893Sdim virtual ~IdentifierIterator(); 367218893Sdim 368218893Sdim /// \brief Retrieve the next string in the identifier table and 369218893Sdim /// advances the iterator for the following string. 370218893Sdim /// 371218893Sdim /// \returns The next string in the identifier table. If there is 372226633Sdim /// no such string, returns an empty \c StringRef. 373226633Sdim virtual StringRef Next() = 0; 374218893Sdim}; 375218893Sdim 376263508Sdim/// \brief Provides lookups to, and iteration over, IdentiferInfo objects. 377193326Sedclass IdentifierInfoLookup { 378193326Sedpublic: 379193326Sed virtual ~IdentifierInfoLookup(); 380198092Srdivacky 381263508Sdim /// \brief Return the IdentifierInfo for the specified named identifier. 382263508Sdim /// 383263508Sdim /// Unlike the version in IdentifierTable, this returns a pointer instead 384263508Sdim /// of a reference. If the pointer is null then the IdentifierInfo cannot 385263508Sdim /// be found. 386226633Sdim virtual IdentifierInfo* get(StringRef Name) = 0; 387218893Sdim 388218893Sdim /// \brief Retrieve an iterator into the set of all identifiers 389218893Sdim /// known to this identifier lookup source. 390218893Sdim /// 391218893Sdim /// This routine provides access to all of the identifiers known to 392218893Sdim /// the identifier lookup, allowing access to the contents of the 393218893Sdim /// identifiers without introducing the overhead of constructing 394218893Sdim /// IdentifierInfo objects for each. 395218893Sdim /// 396218893Sdim /// \returns A new iterator into the set of known identifiers. The 397218893Sdim /// caller is responsible for deleting this iterator. 398251662Sdim virtual IdentifierIterator *getIdentifiers(); 399198092Srdivacky}; 400193326Sed 401193326Sed/// \brief An abstract class used to resolve numerical identifier 402193326Sed/// references (meaningful only to some external source) into 403193326Sed/// IdentifierInfo pointers. 404193326Sedclass ExternalIdentifierLookup { 405193326Sedpublic: 406193326Sed virtual ~ExternalIdentifierLookup(); 407193326Sed 408193326Sed /// \brief Return the identifier associated with the given ID number. 409193326Sed /// 410193326Sed /// The ID 0 is associated with the NULL identifier. 411193326Sed virtual IdentifierInfo *GetIdentifier(unsigned ID) = 0; 412193326Sed}; 413193326Sed 414239462Sdim/// \brief Implements an efficient mapping from strings to IdentifierInfo nodes. 415239462Sdim/// 416239462Sdim/// This has no other purpose, but this is an extremely performance-critical 417239462Sdim/// piece of the code, as each occurrence of every identifier goes through 418239462Sdim/// here when lexed. 419193326Sedclass IdentifierTable { 420193326Sed // Shark shows that using MallocAllocator is *much* slower than using this 421193326Sed // BumpPtrAllocator! 422193326Sed typedef llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator> HashTableTy; 423193326Sed HashTableTy HashTable; 424198092Srdivacky 425193326Sed IdentifierInfoLookup* ExternalLookup; 426193326Sed 427193326Sedpublic: 428239462Sdim /// \brief Create the identifier table, populating it with info about the 429239462Sdim /// language keywords for the language specified by \p LangOpts. 430193326Sed IdentifierTable(const LangOptions &LangOpts, 431193326Sed IdentifierInfoLookup* externalLookup = 0); 432198092Srdivacky 433193326Sed /// \brief Set the external identifier lookup mechanism. 434193326Sed void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) { 435193326Sed ExternalLookup = IILookup; 436193326Sed } 437193326Sed 438218893Sdim /// \brief Retrieve the external identifier lookup object, if any. 439218893Sdim IdentifierInfoLookup *getExternalIdentifierLookup() const { 440218893Sdim return ExternalLookup; 441218893Sdim } 442218893Sdim 443193326Sed llvm::BumpPtrAllocator& getAllocator() { 444193326Sed return HashTable.getAllocator(); 445193326Sed } 446198092Srdivacky 447239462Sdim /// \brief Return the identifier token info for the specified named 448239462Sdim /// identifier. 449226633Sdim IdentifierInfo &get(StringRef Name) { 450193326Sed llvm::StringMapEntry<IdentifierInfo*> &Entry = 451205219Srdivacky HashTable.GetOrCreateValue(Name); 452198092Srdivacky 453193326Sed IdentifierInfo *II = Entry.getValue(); 454193326Sed if (II) return *II; 455198092Srdivacky 456193326Sed // No entry; if we have an external lookup, look there first. 457193326Sed if (ExternalLookup) { 458205219Srdivacky II = ExternalLookup->get(Name); 459193326Sed if (II) { 460193326Sed // Cache in the StringMap for subsequent lookups. 461193326Sed Entry.setValue(II); 462193326Sed return *II; 463193326Sed } 464193326Sed } 465193326Sed 466193326Sed // Lookups failed, make a new IdentifierInfo. 467193326Sed void *Mem = getAllocator().Allocate<IdentifierInfo>(); 468193326Sed II = new (Mem) IdentifierInfo(); 469193326Sed Entry.setValue(II); 470193326Sed 471193326Sed // Make sure getName() knows how to find the IdentifierInfo 472193326Sed // contents. 473193326Sed II->Entry = &Entry; 474193326Sed 475193326Sed return *II; 476193326Sed } 477198092Srdivacky 478226633Sdim IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) { 479212904Sdim IdentifierInfo &II = get(Name); 480212904Sdim II.TokenID = TokenCode; 481226633Sdim assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large"); 482212904Sdim return II; 483212904Sdim } 484212904Sdim 485212904Sdim /// \brief Gets an IdentifierInfo for the given name without consulting 486212904Sdim /// external sources. 487193326Sed /// 488212904Sdim /// This is a version of get() meant for external sources that want to 489212904Sdim /// introduce or modify an identifier. If they called get(), they would 490212904Sdim /// likely end up in a recursion. 491226633Sdim IdentifierInfo &getOwn(StringRef Name) { 492193326Sed llvm::StringMapEntry<IdentifierInfo*> &Entry = 493224145Sdim HashTable.GetOrCreateValue(Name); 494198092Srdivacky 495193326Sed IdentifierInfo *II = Entry.getValue(); 496212904Sdim if (!II) { 497198092Srdivacky 498212904Sdim // Lookups failed, make a new IdentifierInfo. 499212904Sdim void *Mem = getAllocator().Allocate<IdentifierInfo>(); 500212904Sdim II = new (Mem) IdentifierInfo(); 501212904Sdim Entry.setValue(II); 502193326Sed 503212904Sdim // Make sure getName() knows how to find the IdentifierInfo 504212904Sdim // contents. 505212904Sdim II->Entry = &Entry; 506234353Sdim 507234353Sdim // If this is the 'import' contextual keyword, mark it as such. 508234353Sdim if (Name.equals("import")) 509234353Sdim II->setModulesImport(true); 510212904Sdim } 511193326Sed 512193326Sed return *II; 513193326Sed } 514193326Sed 515193326Sed typedef HashTableTy::const_iterator iterator; 516193326Sed typedef HashTableTy::const_iterator const_iterator; 517198092Srdivacky 518193326Sed iterator begin() const { return HashTable.begin(); } 519193326Sed iterator end() const { return HashTable.end(); } 520193326Sed unsigned size() const { return HashTable.size(); } 521198092Srdivacky 522239462Sdim /// \brief Print some statistics to stderr that indicate how well the 523193326Sed /// hashing is doing. 524193326Sed void PrintStats() const; 525198092Srdivacky 526193326Sed void AddKeywords(const LangOptions &LangOpts); 527193326Sed}; 528193326Sed 529239462Sdim/// \brief A family of Objective-C methods. 530239462Sdim/// 531239462Sdim/// These families have no inherent meaning in the language, but are 532221345Sdim/// nonetheless central enough in the existing implementations to 533221345Sdim/// merit direct AST support. While, in theory, arbitrary methods can 534221345Sdim/// be considered to form families, we focus here on the methods 535221345Sdim/// involving allocation and retain-count management, as these are the 536221345Sdim/// most "core" and the most likely to be useful to diverse clients 537221345Sdim/// without extra information. 538221345Sdim/// 539221345Sdim/// Both selectors and actual method declarations may be classified 540221345Sdim/// into families. Method families may impose additional restrictions 541221345Sdim/// beyond their selector name; for example, a method called '_init' 542221345Sdim/// that returns void is not considered to be in the 'init' family 543221345Sdim/// (but would be if it returned 'id'). It is also possible to 544221345Sdim/// explicitly change or remove a method's family. Therefore the 545221345Sdim/// method's family should be considered the single source of truth. 546221345Sdimenum ObjCMethodFamily { 547221345Sdim /// \brief No particular method family. 548221345Sdim OMF_None, 549221345Sdim 550221345Sdim // Selectors in these families may have arbitrary arity, may be 551221345Sdim // written with arbitrary leading underscores, and may have 552221345Sdim // additional CamelCase "words" in their first selector chunk 553221345Sdim // following the family name. 554221345Sdim OMF_alloc, 555221345Sdim OMF_copy, 556221345Sdim OMF_init, 557221345Sdim OMF_mutableCopy, 558221345Sdim OMF_new, 559221345Sdim 560221345Sdim // These families are singletons consisting only of the nullary 561221345Sdim // selector with the given name. 562221345Sdim OMF_autorelease, 563221345Sdim OMF_dealloc, 564226633Sdim OMF_finalize, 565221345Sdim OMF_release, 566221345Sdim OMF_retain, 567223017Sdim OMF_retainCount, 568224145Sdim OMF_self, 569224145Sdim 570224145Sdim // performSelector families 571224145Sdim OMF_performSelector 572221345Sdim}; 573221345Sdim 574221345Sdim/// Enough bits to store any enumerator in ObjCMethodFamily or 575221345Sdim/// InvalidObjCMethodFamily. 576221345Sdimenum { ObjCMethodFamilyBitWidth = 4 }; 577221345Sdim 578239462Sdim/// \brief An invalid value of ObjCMethodFamily. 579221345Sdimenum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 }; 580221345Sdim 581263508Sdim/// \brief A family of Objective-C methods. 582263508Sdim/// 583263508Sdim/// These are family of methods whose result type is initially 'id', but 584263508Sdim/// but are candidate for the result type to be changed to 'instancetype'. 585263508Sdimenum ObjCInstanceTypeFamily { 586263508Sdim OIT_None, 587263508Sdim OIT_Array, 588263508Sdim OIT_Dictionary, 589263508Sdim OIT_Singleton, 590263508Sdim OIT_Init, 591263508Sdim OIT_ReturnsSelf 592263508Sdim}; 593263508Sdim 594239462Sdim/// \brief Smart pointer class that efficiently represents Objective-C method 595239462Sdim/// names. 596239462Sdim/// 597239462Sdim/// This class will either point to an IdentifierInfo or a 598193326Sed/// MultiKeywordSelector (which is private). This enables us to optimize 599198092Srdivacky/// selectors that take no arguments and selectors that take 1 argument, which 600193326Sed/// accounts for 78% of all selectors in Cocoa.h. 601193326Sedclass Selector { 602226633Sdim friend class Diagnostic; 603198092Srdivacky 604193326Sed enum IdentifierInfoFlag { 605239462Sdim // Empty selector = 0. 606193326Sed ZeroArg = 0x1, 607193326Sed OneArg = 0x2, 608239462Sdim MultiArg = 0x3, 609193326Sed ArgFlags = ZeroArg|OneArg 610193326Sed }; 611193326Sed uintptr_t InfoPtr; // a pointer to the MultiKeywordSelector or IdentifierInfo. 612198092Srdivacky 613193326Sed Selector(IdentifierInfo *II, unsigned nArgs) { 614193326Sed InfoPtr = reinterpret_cast<uintptr_t>(II); 615193326Sed assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo"); 616193326Sed assert(nArgs < 2 && "nArgs not equal to 0/1"); 617193326Sed InfoPtr |= nArgs+1; 618193326Sed } 619193326Sed Selector(MultiKeywordSelector *SI) { 620193326Sed InfoPtr = reinterpret_cast<uintptr_t>(SI); 621193326Sed assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo"); 622239462Sdim InfoPtr |= MultiArg; 623193326Sed } 624198092Srdivacky 625193326Sed IdentifierInfo *getAsIdentifierInfo() const { 626239462Sdim if (getIdentifierInfoFlag() < MultiArg) 627193326Sed return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags); 628193326Sed return 0; 629193326Sed } 630239462Sdim MultiKeywordSelector *getMultiKeywordSelector() const { 631239462Sdim return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags); 632239462Sdim } 633239462Sdim 634193326Sed unsigned getIdentifierInfoFlag() const { 635193326Sed return InfoPtr & ArgFlags; 636193326Sed } 637193326Sed 638221345Sdim static ObjCMethodFamily getMethodFamilyImpl(Selector sel); 639221345Sdim 640193326Sedpublic: 641193326Sed friend class SelectorTable; // only the SelectorTable can create these 642193326Sed friend class DeclarationName; // and the AST's DeclarationName. 643193326Sed 644193326Sed /// The default ctor should only be used when creating data structures that 645193326Sed /// will contain selectors. 646193326Sed Selector() : InfoPtr(0) {} 647193326Sed Selector(uintptr_t V) : InfoPtr(V) {} 648193326Sed 649193326Sed /// operator==/!= - Indicate whether the specified selectors are identical. 650193326Sed bool operator==(Selector RHS) const { 651193326Sed return InfoPtr == RHS.InfoPtr; 652193326Sed } 653193326Sed bool operator!=(Selector RHS) const { 654193326Sed return InfoPtr != RHS.InfoPtr; 655193326Sed } 656193326Sed void *getAsOpaquePtr() const { 657193326Sed return reinterpret_cast<void*>(InfoPtr); 658193326Sed } 659193326Sed 660193326Sed /// \brief Determine whether this is the empty selector. 661193326Sed bool isNull() const { return InfoPtr == 0; } 662193326Sed 663193326Sed // Predicates to identify the selector type. 664198092Srdivacky bool isKeywordSelector() const { 665198092Srdivacky return getIdentifierInfoFlag() != ZeroArg; 666193326Sed } 667198092Srdivacky bool isUnarySelector() const { 668193326Sed return getIdentifierInfoFlag() == ZeroArg; 669193326Sed } 670193326Sed unsigned getNumArgs() const; 671218893Sdim 672218893Sdim 673218893Sdim /// \brief Retrieve the identifier at a given position in the selector. 674218893Sdim /// 675218893Sdim /// Note that the identifier pointer returned may be NULL. Clients that only 676218893Sdim /// care about the text of the identifier string, and not the specific, 677218893Sdim /// uniqued identifier pointer, should use \c getNameForSlot(), which returns 678218893Sdim /// an empty string when the identifier pointer would be NULL. 679218893Sdim /// 680218893Sdim /// \param argIndex The index for which we want to retrieve the identifier. 681218893Sdim /// This index shall be less than \c getNumArgs() unless this is a keyword 682218893Sdim /// selector, in which case 0 is the only permissible value. 683218893Sdim /// 684218893Sdim /// \returns the uniqued identifier for this slot, or NULL if this slot has 685218893Sdim /// no corresponding identifier. 686193326Sed IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const; 687218893Sdim 688218893Sdim /// \brief Retrieve the name at a given position in the selector. 689218893Sdim /// 690218893Sdim /// \param argIndex The index for which we want to retrieve the name. 691218893Sdim /// This index shall be less than \c getNumArgs() unless this is a keyword 692218893Sdim /// selector, in which case 0 is the only permissible value. 693218893Sdim /// 694218893Sdim /// \returns the name for this slot, which may be the empty string if no 695218893Sdim /// name was supplied. 696226633Sdim StringRef getNameForSlot(unsigned argIndex) const; 697218893Sdim 698239462Sdim /// \brief Derive the full selector name (e.g. "foo:bar:") and return 699193326Sed /// it as an std::string. 700239462Sdim // FIXME: Add a print method that uses a raw_ostream. 701193326Sed std::string getAsString() const; 702198092Srdivacky 703239462Sdim /// \brief Derive the conventional family of this method. 704221345Sdim ObjCMethodFamily getMethodFamily() const { 705221345Sdim return getMethodFamilyImpl(*this); 706221345Sdim } 707221345Sdim 708193326Sed static Selector getEmptyMarker() { 709193326Sed return Selector(uintptr_t(-1)); 710193326Sed } 711193326Sed static Selector getTombstoneMarker() { 712193326Sed return Selector(uintptr_t(-2)); 713193326Sed } 714263508Sdim 715263508Sdim static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel); 716193326Sed}; 717193326Sed 718239462Sdim/// \brief This table allows us to fully hide how we implement 719193326Sed/// multi-keyword caching. 720193326Sedclass SelectorTable { 721193326Sed void *Impl; // Actually a SelectorTableImpl 722243830Sdim SelectorTable(const SelectorTable &) LLVM_DELETED_FUNCTION; 723243830Sdim void operator=(const SelectorTable &) LLVM_DELETED_FUNCTION; 724193326Sedpublic: 725193326Sed SelectorTable(); 726193326Sed ~SelectorTable(); 727193326Sed 728239462Sdim /// \brief Can create any sort of selector. 729239462Sdim /// 730239462Sdim /// \p NumArgs indicates whether this is a no argument selector "foo", a 731239462Sdim /// single argument selector "foo:" or multi-argument "foo:bar:". 732193326Sed Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV); 733198092Srdivacky 734193326Sed Selector getUnarySelector(IdentifierInfo *ID) { 735193326Sed return Selector(ID, 1); 736193326Sed } 737193326Sed Selector getNullarySelector(IdentifierInfo *ID) { 738193326Sed return Selector(ID, 0); 739193326Sed } 740193326Sed 741239462Sdim /// \brief Return the total amount of memory allocated for managing selectors. 742221345Sdim size_t getTotalMemory() const; 743221345Sdim 744263508Sdim /// \brief Return the default setter name for the given identifier. 745239462Sdim /// 746239462Sdim /// This is "set" + \p Name where the initial character of \p Name 747193326Sed /// has been capitalized. 748263508Sdim static SmallString<64> constructSetterName(StringRef Name); 749263508Sdim 750263508Sdim /// \brief Return the default setter selector for the given identifier. 751263508Sdim /// 752263508Sdim /// This is "set" + \p Name where the initial character of \p Name 753263508Sdim /// has been capitalized. 754263508Sdim static Selector constructSetterSelector(IdentifierTable &Idents, 755263508Sdim SelectorTable &SelTable, 756263508Sdim const IdentifierInfo *Name); 757193326Sed}; 758193326Sed 759193326Sed/// DeclarationNameExtra - Common base of the MultiKeywordSelector, 760193326Sed/// CXXSpecialName, and CXXOperatorIdName classes, all of which are 761193326Sed/// private classes that describe different kinds of names. 762193326Sedclass DeclarationNameExtra { 763193326Sedpublic: 764193326Sed /// ExtraKind - The kind of "extra" information stored in the 765193326Sed /// DeclarationName. See @c ExtraKindOrNumArgs for an explanation of 766193326Sed /// how these enumerator values are used. 767193326Sed enum ExtraKind { 768193326Sed CXXConstructor = 0, 769193326Sed CXXDestructor, 770193326Sed CXXConversionFunction, 771193326Sed#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \ 772193326Sed CXXOperator##Name, 773193326Sed#include "clang/Basic/OperatorKinds.def" 774199990Srdivacky CXXLiteralOperator, 775193326Sed CXXUsingDirective, 776193326Sed NUM_EXTRA_KINDS 777193326Sed }; 778193326Sed 779193326Sed /// ExtraKindOrNumArgs - Either the kind of C++ special name or 780193326Sed /// operator-id (if the value is one of the CXX* enumerators of 781193326Sed /// ExtraKind), in which case the DeclarationNameExtra is also a 782199990Srdivacky /// CXXSpecialName, (for CXXConstructor, CXXDestructor, or 783199990Srdivacky /// CXXConversionFunction) CXXOperatorIdName, or CXXLiteralOperatorName, 784199990Srdivacky /// it may be also name common to C++ using-directives (CXXUsingDirective), 785199990Srdivacky /// otherwise it is NUM_EXTRA_KINDS+NumArgs, where NumArgs is the number of 786193326Sed /// arguments in the Objective-C selector, in which case the 787193326Sed /// DeclarationNameExtra is also a MultiKeywordSelector. 788193326Sed unsigned ExtraKindOrNumArgs; 789193326Sed}; 790193326Sed 791193326Sed} // end namespace clang 792193326Sed 793193326Sednamespace llvm { 794193326Sed/// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and 795193326Sed/// DenseSets. 796193326Sedtemplate <> 797193326Sedstruct DenseMapInfo<clang::Selector> { 798193326Sed static inline clang::Selector getEmptyKey() { 799193326Sed return clang::Selector::getEmptyMarker(); 800193326Sed } 801193326Sed static inline clang::Selector getTombstoneKey() { 802198092Srdivacky return clang::Selector::getTombstoneMarker(); 803193326Sed } 804198092Srdivacky 805193326Sed static unsigned getHashValue(clang::Selector S); 806198092Srdivacky 807193326Sed static bool isEqual(clang::Selector LHS, clang::Selector RHS) { 808193326Sed return LHS == RHS; 809193326Sed } 810193326Sed}; 811205219Srdivacky 812200583Srdivackytemplate <> 813200583Srdivackystruct isPodLike<clang::Selector> { static const bool value = true; }; 814193326Sed 815218893Sdimtemplate<> 816218893Sdimclass PointerLikeTypeTraits<clang::Selector> { 817218893Sdimpublic: 818218893Sdim static inline const void *getAsVoidPointer(clang::Selector P) { 819218893Sdim return P.getAsOpaquePtr(); 820218893Sdim } 821218893Sdim static inline clang::Selector getFromVoidPointer(const void *P) { 822218893Sdim return clang::Selector(reinterpret_cast<uintptr_t>(P)); 823218893Sdim } 824218893Sdim enum { NumLowBitsAvailable = 0 }; 825218893Sdim}; 826200583Srdivacky 827193326Sed// Provide PointerLikeTypeTraits for IdentifierInfo pointers, which 828193326Sed// are not guaranteed to be 8-byte aligned. 829193326Sedtemplate<> 830193326Sedclass PointerLikeTypeTraits<clang::IdentifierInfo*> { 831193326Sedpublic: 832193326Sed static inline void *getAsVoidPointer(clang::IdentifierInfo* P) { 833198092Srdivacky return P; 834193326Sed } 835193326Sed static inline clang::IdentifierInfo *getFromVoidPointer(void *P) { 836193326Sed return static_cast<clang::IdentifierInfo*>(P); 837193326Sed } 838193326Sed enum { NumLowBitsAvailable = 1 }; 839193326Sed}; 840193326Sed 841193326Sedtemplate<> 842193326Sedclass PointerLikeTypeTraits<const clang::IdentifierInfo*> { 843193326Sedpublic: 844193326Sed static inline const void *getAsVoidPointer(const clang::IdentifierInfo* P) { 845198092Srdivacky return P; 846193326Sed } 847193326Sed static inline const clang::IdentifierInfo *getFromVoidPointer(const void *P) { 848193326Sed return static_cast<const clang::IdentifierInfo*>(P); 849193326Sed } 850193326Sed enum { NumLowBitsAvailable = 1 }; 851193326Sed}; 852193326Sed 853193326Sed} // end namespace llvm 854193326Sed#endif 855