IdentifierTable.cpp revision 207619
1//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IdentifierInfo, IdentifierVisitor, and
11// IdentifierTable interfaces.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Basic/IdentifierTable.h"
16#include "clang/Basic/LangOptions.h"
17#include "llvm/ADT/FoldingSet.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Support/raw_ostream.h"
21#include <cstdio>
22
23using namespace clang;
24
25//===----------------------------------------------------------------------===//
26// IdentifierInfo Implementation
27//===----------------------------------------------------------------------===//
28
29IdentifierInfo::IdentifierInfo() {
30  TokenID = tok::identifier;
31  ObjCOrBuiltinID = 0;
32  HasMacro = false;
33  IsExtension = false;
34  IsPoisoned = false;
35  IsCPPOperatorKeyword = false;
36  NeedsHandleIdentifier = false;
37  FETokenInfo = 0;
38  Entry = 0;
39}
40
41//===----------------------------------------------------------------------===//
42// IdentifierTable Implementation
43//===----------------------------------------------------------------------===//
44
45IdentifierInfoLookup::~IdentifierInfoLookup() {}
46
47ExternalIdentifierLookup::~ExternalIdentifierLookup() {}
48
49IdentifierTable::IdentifierTable(const LangOptions &LangOpts,
50                                 IdentifierInfoLookup* externalLookup)
51  : HashTable(8192), // Start with space for 8K identifiers.
52    ExternalLookup(externalLookup) {
53
54  // Populate the identifier table with info about keywords for the current
55  // language.
56  AddKeywords(LangOpts);
57}
58
59//===----------------------------------------------------------------------===//
60// Language Keyword Implementation
61//===----------------------------------------------------------------------===//
62
63// Constants for TokenKinds.def
64namespace {
65  enum {
66    KEYALL = 1,
67    KEYC99 = 2,
68    KEYCXX = 4,
69    KEYCXX0X = 8,
70    KEYGNU = 16,
71    KEYMS = 32,
72    BOOLSUPPORT = 64,
73    KEYALTIVEC = 128
74  };
75}
76
77/// AddKeyword - This method is used to associate a token ID with specific
78/// identifiers because they are language keywords.  This causes the lexer to
79/// automatically map matching identifiers to specialized token codes.
80///
81/// The C90/C99/CPP/CPP0x flags are set to 2 if the token should be
82/// enabled in the specified langauge, set to 1 if it is an extension
83/// in the specified language, and set to 0 if disabled in the
84/// specified language.
85static void AddKeyword(llvm::StringRef Keyword,
86                       tok::TokenKind TokenCode, unsigned Flags,
87                       const LangOptions &LangOpts, IdentifierTable &Table) {
88  unsigned AddResult = 0;
89  if (Flags & KEYALL) AddResult = 2;
90  else if (LangOpts.CPlusPlus && (Flags & KEYCXX)) AddResult = 2;
91  else if (LangOpts.CPlusPlus0x && (Flags & KEYCXX0X)) AddResult = 2;
92  else if (LangOpts.C99 && (Flags & KEYC99)) AddResult = 2;
93  else if (LangOpts.GNUKeywords && (Flags & KEYGNU)) AddResult = 1;
94  else if (LangOpts.Microsoft && (Flags & KEYMS)) AddResult = 1;
95  else if (LangOpts.Bool && (Flags & BOOLSUPPORT)) AddResult = 2;
96  else if (LangOpts.AltiVec && (Flags & KEYALTIVEC)) AddResult = 2;
97
98  // Don't add this keyword if disabled in this language.
99  if (AddResult == 0) return;
100
101  IdentifierInfo &Info = Table.get(Keyword);
102  Info.setTokenID(TokenCode);
103  Info.setIsExtensionToken(AddResult == 1);
104}
105
106/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
107/// representations.
108static void AddCXXOperatorKeyword(llvm::StringRef Keyword,
109                                  tok::TokenKind TokenCode,
110                                  IdentifierTable &Table) {
111  IdentifierInfo &Info = Table.get(Keyword);
112  Info.setTokenID(TokenCode);
113  Info.setIsCPlusPlusOperatorKeyword();
114}
115
116/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or
117/// "property".
118static void AddObjCKeyword(llvm::StringRef Name,
119                           tok::ObjCKeywordKind ObjCID,
120                           IdentifierTable &Table) {
121  Table.get(Name).setObjCKeywordID(ObjCID);
122}
123
124/// AddKeywords - Add all keywords to the symbol table.
125///
126void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
127  // Add keywords and tokens for the current language.
128#define KEYWORD(NAME, FLAGS) \
129  AddKeyword(llvm::StringRef(#NAME), tok::kw_ ## NAME,  \
130             FLAGS, LangOpts, *this);
131#define ALIAS(NAME, TOK, FLAGS) \
132  AddKeyword(llvm::StringRef(NAME), tok::kw_ ## TOK,  \
133             FLAGS, LangOpts, *this);
134#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
135  if (LangOpts.CXXOperatorNames)          \
136    AddCXXOperatorKeyword(llvm::StringRef(#NAME), tok::ALIAS, *this);
137#define OBJC1_AT_KEYWORD(NAME) \
138  if (LangOpts.ObjC1)          \
139    AddObjCKeyword(llvm::StringRef(#NAME), tok::objc_##NAME, *this);
140#define OBJC2_AT_KEYWORD(NAME) \
141  if (LangOpts.ObjC2)          \
142    AddObjCKeyword(llvm::StringRef(#NAME), tok::objc_##NAME, *this);
143#include "clang/Basic/TokenKinds.def"
144}
145
146tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
147  // We use a perfect hash function here involving the length of the keyword,
148  // the first and third character.  For preprocessor ID's there are no
149  // collisions (if there were, the switch below would complain about duplicate
150  // case values).  Note that this depends on 'if' being null terminated.
151
152#define HASH(LEN, FIRST, THIRD) \
153  (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
154#define CASE(LEN, FIRST, THIRD, NAME) \
155  case HASH(LEN, FIRST, THIRD): \
156    return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
157
158  unsigned Len = getLength();
159  if (Len < 2) return tok::pp_not_keyword;
160  const char *Name = getNameStart();
161  switch (HASH(Len, Name[0], Name[2])) {
162  default: return tok::pp_not_keyword;
163  CASE( 2, 'i', '\0', if);
164  CASE( 4, 'e', 'i', elif);
165  CASE( 4, 'e', 's', else);
166  CASE( 4, 'l', 'n', line);
167  CASE( 4, 's', 'c', sccs);
168  CASE( 5, 'e', 'd', endif);
169  CASE( 5, 'e', 'r', error);
170  CASE( 5, 'i', 'e', ident);
171  CASE( 5, 'i', 'd', ifdef);
172  CASE( 5, 'u', 'd', undef);
173
174  CASE( 6, 'a', 's', assert);
175  CASE( 6, 'd', 'f', define);
176  CASE( 6, 'i', 'n', ifndef);
177  CASE( 6, 'i', 'p', import);
178  CASE( 6, 'p', 'a', pragma);
179
180  CASE( 7, 'd', 'f', defined);
181  CASE( 7, 'i', 'c', include);
182  CASE( 7, 'w', 'r', warning);
183
184  CASE( 8, 'u', 'a', unassert);
185  CASE(12, 'i', 'c', include_next);
186
187  CASE(16, '_', 'i', __include_macros);
188#undef CASE
189#undef HASH
190  }
191}
192
193//===----------------------------------------------------------------------===//
194// Stats Implementation
195//===----------------------------------------------------------------------===//
196
197/// PrintStats - Print statistics about how well the identifier table is doing
198/// at hashing identifiers.
199void IdentifierTable::PrintStats() const {
200  unsigned NumBuckets = HashTable.getNumBuckets();
201  unsigned NumIdentifiers = HashTable.getNumItems();
202  unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
203  unsigned AverageIdentifierSize = 0;
204  unsigned MaxIdentifierLength = 0;
205
206  // TODO: Figure out maximum times an identifier had to probe for -stats.
207  for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator
208       I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
209    unsigned IdLen = I->getKeyLength();
210    AverageIdentifierSize += IdLen;
211    if (MaxIdentifierLength < IdLen)
212      MaxIdentifierLength = IdLen;
213  }
214
215  fprintf(stderr, "\n*** Identifier Table Stats:\n");
216  fprintf(stderr, "# Identifiers:   %d\n", NumIdentifiers);
217  fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
218  fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
219          NumIdentifiers/(double)NumBuckets);
220  fprintf(stderr, "Ave identifier length: %f\n",
221          (AverageIdentifierSize/(double)NumIdentifiers));
222  fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
223
224  // Compute statistics about the memory allocated for identifiers.
225  HashTable.getAllocator().PrintStats();
226}
227
228//===----------------------------------------------------------------------===//
229// SelectorTable Implementation
230//===----------------------------------------------------------------------===//
231
232unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
233  return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
234}
235
236namespace clang {
237/// MultiKeywordSelector - One of these variable length records is kept for each
238/// selector containing more than one keyword. We use a folding set
239/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
240/// this class is provided strictly through Selector.
241class MultiKeywordSelector
242  : public DeclarationNameExtra, public llvm::FoldingSetNode {
243  MultiKeywordSelector(unsigned nKeys) {
244    ExtraKindOrNumArgs = NUM_EXTRA_KINDS + nKeys;
245  }
246public:
247  // Constructor for keyword selectors.
248  MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV) {
249    assert((nKeys > 1) && "not a multi-keyword selector");
250    ExtraKindOrNumArgs = NUM_EXTRA_KINDS + nKeys;
251
252    // Fill in the trailing keyword array.
253    IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this+1);
254    for (unsigned i = 0; i != nKeys; ++i)
255      KeyInfo[i] = IIV[i];
256  }
257
258  // getName - Derive the full selector name and return it.
259  std::string getName() const;
260
261  unsigned getNumArgs() const { return ExtraKindOrNumArgs - NUM_EXTRA_KINDS; }
262
263  typedef IdentifierInfo *const *keyword_iterator;
264  keyword_iterator keyword_begin() const {
265    return reinterpret_cast<keyword_iterator>(this+1);
266  }
267  keyword_iterator keyword_end() const {
268    return keyword_begin()+getNumArgs();
269  }
270  IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
271    assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index");
272    return keyword_begin()[i];
273  }
274  static void Profile(llvm::FoldingSetNodeID &ID,
275                      keyword_iterator ArgTys, unsigned NumArgs) {
276    ID.AddInteger(NumArgs);
277    for (unsigned i = 0; i != NumArgs; ++i)
278      ID.AddPointer(ArgTys[i]);
279  }
280  void Profile(llvm::FoldingSetNodeID &ID) {
281    Profile(ID, keyword_begin(), getNumArgs());
282  }
283};
284} // end namespace clang.
285
286unsigned Selector::getNumArgs() const {
287  unsigned IIF = getIdentifierInfoFlag();
288  if (IIF == ZeroArg)
289    return 0;
290  if (IIF == OneArg)
291    return 1;
292  // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
293  MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
294  return SI->getNumArgs();
295}
296
297IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
298  if (getIdentifierInfoFlag()) {
299    assert(argIndex == 0 && "illegal keyword index");
300    return getAsIdentifierInfo();
301  }
302  // We point to a MultiKeywordSelector (pointer doesn't contain any flags).
303  MultiKeywordSelector *SI = reinterpret_cast<MultiKeywordSelector *>(InfoPtr);
304  return SI->getIdentifierInfoForSlot(argIndex);
305}
306
307std::string MultiKeywordSelector::getName() const {
308  llvm::SmallString<256> Str;
309  llvm::raw_svector_ostream OS(Str);
310  for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
311    if (*I)
312      OS << (*I)->getName();
313    OS << ':';
314  }
315
316  return OS.str();
317}
318
319std::string Selector::getAsString() const {
320  if (InfoPtr == 0)
321    return "<null selector>";
322
323  if (InfoPtr & ArgFlags) {
324    IdentifierInfo *II = getAsIdentifierInfo();
325
326    // If the number of arguments is 0 then II is guaranteed to not be null.
327    if (getNumArgs() == 0)
328      return II->getName();
329
330    if (!II)
331      return ":";
332
333    return II->getName().str() + ":";
334  }
335
336  // We have a multiple keyword selector (no embedded flags).
337  return reinterpret_cast<MultiKeywordSelector *>(InfoPtr)->getName();
338}
339
340
341namespace {
342  struct SelectorTableImpl {
343    llvm::FoldingSet<MultiKeywordSelector> Table;
344    llvm::BumpPtrAllocator Allocator;
345  };
346} // end anonymous namespace.
347
348static SelectorTableImpl &getSelectorTableImpl(void *P) {
349  return *static_cast<SelectorTableImpl*>(P);
350}
351
352
353Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) {
354  if (nKeys < 2)
355    return Selector(IIV[0], nKeys);
356
357  SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
358
359  // Unique selector, to guarantee there is one per name.
360  llvm::FoldingSetNodeID ID;
361  MultiKeywordSelector::Profile(ID, IIV, nKeys);
362
363  void *InsertPos = 0;
364  if (MultiKeywordSelector *SI =
365        SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos))
366    return Selector(SI);
367
368  // MultiKeywordSelector objects are not allocated with new because they have a
369  // variable size array (for parameter types) at the end of them.
370  unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *);
371  MultiKeywordSelector *SI =
372    (MultiKeywordSelector*)SelTabImpl.Allocator.Allocate(Size,
373                                         llvm::alignof<MultiKeywordSelector>());
374  new (SI) MultiKeywordSelector(nKeys, IIV);
375  SelTabImpl.Table.InsertNode(SI, InsertPos);
376  return Selector(SI);
377}
378
379SelectorTable::SelectorTable() {
380  Impl = new SelectorTableImpl();
381}
382
383SelectorTable::~SelectorTable() {
384  delete &getSelectorTableImpl(Impl);
385}
386
387const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) {
388  switch (Operator) {
389  case OO_None:
390  case NUM_OVERLOADED_OPERATORS:
391    return 0;
392
393#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
394  case OO_##Name: return Spelling;
395#include "clang/Basic/OperatorKinds.def"
396  }
397
398  return 0;
399}
400
401