1//===- IdentifierTable.cpp - Hash table for identifier lookup -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the IdentifierInfo, IdentifierVisitor, and
10// IdentifierTable interfaces.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/IdentifierTable.h"
15#include "clang/Basic/CharInfo.h"
16#include "clang/Basic/DiagnosticLex.h"
17#include "clang/Basic/LangOptions.h"
18#include "clang/Basic/OperatorKinds.h"
19#include "clang/Basic/Specifiers.h"
20#include "clang/Basic/TargetBuiltins.h"
21#include "clang/Basic/TokenKinds.h"
22#include "llvm/ADT/DenseMapInfo.h"
23#include "llvm/ADT/FoldingSet.h"
24#include "llvm/ADT/SmallString.h"
25#include "llvm/ADT/StringMap.h"
26#include "llvm/ADT/StringRef.h"
27#include "llvm/Support/Allocator.h"
28#include "llvm/Support/raw_ostream.h"
29#include <cassert>
30#include <cstdio>
31#include <cstring>
32#include <string>
33
34using namespace clang;
35
36// A check to make sure the ObjCOrBuiltinID has sufficient room to store the
37// largest possible target/aux-target combination. If we exceed this, we likely
38// need to just change the ObjCOrBuiltinIDBits value in IdentifierTable.h.
39static_assert(2 * LargestBuiltinID < (2 << (ObjCOrBuiltinIDBits - 1)),
40              "Insufficient ObjCOrBuiltinID Bits");
41
42//===----------------------------------------------------------------------===//
43// IdentifierTable Implementation
44//===----------------------------------------------------------------------===//
45
46IdentifierIterator::~IdentifierIterator() = default;
47
48IdentifierInfoLookup::~IdentifierInfoLookup() = default;
49
50namespace {
51
52/// A simple identifier lookup iterator that represents an
53/// empty sequence of identifiers.
54class EmptyLookupIterator : public IdentifierIterator {
55public:
56  StringRef Next() override { return StringRef(); }
57};
58
59} // namespace
60
61IdentifierIterator *IdentifierInfoLookup::getIdentifiers() {
62  return new EmptyLookupIterator();
63}
64
65IdentifierTable::IdentifierTable(IdentifierInfoLookup *ExternalLookup)
66    : HashTable(8192), // Start with space for 8K identifiers.
67      ExternalLookup(ExternalLookup) {}
68
69IdentifierTable::IdentifierTable(const LangOptions &LangOpts,
70                                 IdentifierInfoLookup *ExternalLookup)
71    : IdentifierTable(ExternalLookup) {
72  // Populate the identifier table with info about keywords for the current
73  // language.
74  AddKeywords(LangOpts);
75}
76
77//===----------------------------------------------------------------------===//
78// Language Keyword Implementation
79//===----------------------------------------------------------------------===//
80
81// Constants for TokenKinds.def
82namespace {
83
84  enum TokenKey : unsigned {
85    KEYC99        = 0x1,
86    KEYCXX        = 0x2,
87    KEYCXX11      = 0x4,
88    KEYGNU        = 0x8,
89    KEYMS         = 0x10,
90    BOOLSUPPORT   = 0x20,
91    KEYALTIVEC    = 0x40,
92    KEYNOCXX      = 0x80,
93    KEYBORLAND    = 0x100,
94    KEYOPENCLC    = 0x200,
95    KEYC23        = 0x400,
96    KEYNOMS18     = 0x800,
97    KEYNOOPENCL   = 0x1000,
98    WCHARSUPPORT  = 0x2000,
99    HALFSUPPORT   = 0x4000,
100    CHAR8SUPPORT  = 0x8000,
101    KEYOBJC       = 0x10000,
102    KEYZVECTOR    = 0x20000,
103    KEYCOROUTINES = 0x40000,
104    KEYMODULES    = 0x80000,
105    KEYCXX20      = 0x100000,
106    KEYOPENCLCXX  = 0x200000,
107    KEYMSCOMPAT   = 0x400000,
108    KEYSYCL       = 0x800000,
109    KEYCUDA       = 0x1000000,
110    KEYHLSL       = 0x2000000,
111    KEYFIXEDPOINT = 0x4000000,
112    KEYMAX        = KEYFIXEDPOINT, // The maximum key
113    KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20,
114    KEYALL = (KEYMAX | (KEYMAX-1)) & ~KEYNOMS18 &
115             ~KEYNOOPENCL // KEYNOMS18 and KEYNOOPENCL are used to exclude.
116  };
117
118  /// How a keyword is treated in the selected standard. This enum is ordered
119  /// intentionally so that the value that 'wins' is the most 'permissive'.
120  enum KeywordStatus {
121    KS_Unknown,     // Not yet calculated. Used when figuring out the status.
122    KS_Disabled,    // Disabled
123    KS_Future,      // Is a keyword in future standard
124    KS_Extension,   // Is an extension
125    KS_Enabled,     // Enabled
126  };
127
128} // namespace
129
130// This works on a single TokenKey flag and checks the LangOpts to get the
131// KeywordStatus based exclusively on this flag, so that it can be merged in
132// getKeywordStatus. Most should be enabled/disabled, but some might imply
133// 'future' versions, or extensions. Returns 'unknown' unless this is KNOWN to
134// be disabled, and the calling function makes it 'disabled' if no other flag
135// changes it. This is necessary for the KEYNOCXX and KEYNOOPENCL flags.
136static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts,
137                                            TokenKey Flag) {
138  // Flag is a single bit version of TokenKey (that is, not
139  // KEYALL/KEYALLCXX/etc), so we can check with == throughout this function.
140  assert((Flag & ~(Flag - 1)) == Flag && "Multiple bits set?");
141
142  switch (Flag) {
143  case KEYC99:
144    if (LangOpts.C99)
145      return KS_Enabled;
146    return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
147  case KEYC23:
148    if (LangOpts.C23)
149      return KS_Enabled;
150    return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
151  case KEYCXX:
152    return LangOpts.CPlusPlus ? KS_Enabled : KS_Unknown;
153  case KEYCXX11:
154    if (LangOpts.CPlusPlus11)
155      return KS_Enabled;
156    return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
157  case KEYCXX20:
158    if (LangOpts.CPlusPlus20)
159      return KS_Enabled;
160    return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
161  case KEYGNU:
162    return LangOpts.GNUKeywords ? KS_Extension : KS_Unknown;
163  case KEYMS:
164    return LangOpts.MicrosoftExt ? KS_Extension : KS_Unknown;
165  case BOOLSUPPORT:
166    if (LangOpts.Bool)      return KS_Enabled;
167    return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
168  case KEYALTIVEC:
169    return LangOpts.AltiVec ? KS_Enabled : KS_Unknown;
170  case KEYBORLAND:
171    return LangOpts.Borland ? KS_Extension : KS_Unknown;
172  case KEYOPENCLC:
173    return LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus ? KS_Enabled
174                                                        : KS_Unknown;
175  case WCHARSUPPORT:
176    return LangOpts.WChar ? KS_Enabled : KS_Unknown;
177  case HALFSUPPORT:
178    return LangOpts.Half ? KS_Enabled : KS_Unknown;
179  case CHAR8SUPPORT:
180    if (LangOpts.Char8) return KS_Enabled;
181    if (LangOpts.CPlusPlus20) return KS_Unknown;
182    if (LangOpts.CPlusPlus) return KS_Future;
183    return KS_Unknown;
184  case KEYOBJC:
185    // We treat bridge casts as objective-C keywords so we can warn on them
186    // in non-arc mode.
187    return LangOpts.ObjC ? KS_Enabled : KS_Unknown;
188  case KEYZVECTOR:
189    return LangOpts.ZVector ? KS_Enabled : KS_Unknown;
190  case KEYCOROUTINES:
191    return LangOpts.Coroutines ? KS_Enabled : KS_Unknown;
192  case KEYMODULES:
193    return KS_Unknown;
194  case KEYOPENCLCXX:
195    return LangOpts.OpenCLCPlusPlus ? KS_Enabled : KS_Unknown;
196  case KEYMSCOMPAT:
197    return LangOpts.MSVCCompat ? KS_Enabled : KS_Unknown;
198  case KEYSYCL:
199    return LangOpts.isSYCL() ? KS_Enabled : KS_Unknown;
200  case KEYCUDA:
201    return LangOpts.CUDA ? KS_Enabled : KS_Unknown;
202  case KEYHLSL:
203    return LangOpts.HLSL ? KS_Enabled : KS_Unknown;
204  case KEYNOCXX:
205    // This is enabled in all non-C++ modes, but might be enabled for other
206    // reasons as well.
207    return LangOpts.CPlusPlus ? KS_Unknown : KS_Enabled;
208  case KEYNOOPENCL:
209    // The disable behavior for this is handled in getKeywordStatus.
210    return KS_Unknown;
211  case KEYNOMS18:
212    // The disable behavior for this is handled in getKeywordStatus.
213    return KS_Unknown;
214  case KEYFIXEDPOINT:
215    return LangOpts.FixedPoint ? KS_Enabled : KS_Disabled;
216  default:
217    llvm_unreachable("Unknown KeywordStatus flag");
218  }
219}
220
221/// Translates flags as specified in TokenKinds.def into keyword status
222/// in the given language standard.
223static KeywordStatus getKeywordStatus(const LangOptions &LangOpts,
224                                      unsigned Flags) {
225  // KEYALL means always enabled, so special case this one.
226  if (Flags == KEYALL) return KS_Enabled;
227  // These are tests that need to 'always win', as they are special in that they
228  // disable based on certain conditions.
229  if (LangOpts.OpenCL && (Flags & KEYNOOPENCL)) return KS_Disabled;
230  if (LangOpts.MSVCCompat && (Flags & KEYNOMS18) &&
231      !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2015))
232    return KS_Disabled;
233
234  KeywordStatus CurStatus = KS_Unknown;
235
236  while (Flags != 0) {
237    unsigned CurFlag = Flags & ~(Flags - 1);
238    Flags = Flags & ~CurFlag;
239    CurStatus = std::max(
240        CurStatus,
241        getKeywordStatusHelper(LangOpts, static_cast<TokenKey>(CurFlag)));
242  }
243
244  if (CurStatus == KS_Unknown)
245    return KS_Disabled;
246  return CurStatus;
247}
248
249/// AddKeyword - This method is used to associate a token ID with specific
250/// identifiers because they are language keywords.  This causes the lexer to
251/// automatically map matching identifiers to specialized token codes.
252static void AddKeyword(StringRef Keyword,
253                       tok::TokenKind TokenCode, unsigned Flags,
254                       const LangOptions &LangOpts, IdentifierTable &Table) {
255  KeywordStatus AddResult = getKeywordStatus(LangOpts, Flags);
256
257  // Don't add this keyword if disabled in this language.
258  if (AddResult == KS_Disabled) return;
259
260  IdentifierInfo &Info =
261      Table.get(Keyword, AddResult == KS_Future ? tok::identifier : TokenCode);
262  Info.setIsExtensionToken(AddResult == KS_Extension);
263  Info.setIsFutureCompatKeyword(AddResult == KS_Future);
264}
265
266/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
267/// representations.
268static void AddCXXOperatorKeyword(StringRef Keyword,
269                                  tok::TokenKind TokenCode,
270                                  IdentifierTable &Table) {
271  IdentifierInfo &Info = Table.get(Keyword, TokenCode);
272  Info.setIsCPlusPlusOperatorKeyword();
273}
274
275/// AddObjCKeyword - Register an Objective-C \@keyword like "class" "selector"
276/// or "property".
277static void AddObjCKeyword(StringRef Name,
278                           tok::ObjCKeywordKind ObjCID,
279                           IdentifierTable &Table) {
280  Table.get(Name).setObjCKeywordID(ObjCID);
281}
282
283static void AddInterestingIdentifier(StringRef Name,
284                                     tok::InterestingIdentifierKind BTID,
285                                     IdentifierTable &Table) {
286  // Don't add 'not_interesting' identifier.
287  if (BTID != tok::not_interesting) {
288    IdentifierInfo &Info = Table.get(Name, tok::identifier);
289    Info.setInterestingIdentifierID(BTID);
290  }
291}
292
293/// AddKeywords - Add all keywords to the symbol table.
294///
295void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
296  // Add keywords and tokens for the current language.
297#define KEYWORD(NAME, FLAGS) \
298  AddKeyword(StringRef(#NAME), tok::kw_ ## NAME,  \
299             FLAGS, LangOpts, *this);
300#define ALIAS(NAME, TOK, FLAGS) \
301  AddKeyword(StringRef(NAME), tok::kw_ ## TOK,  \
302             FLAGS, LangOpts, *this);
303#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
304  if (LangOpts.CXXOperatorNames)          \
305    AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this);
306#define OBJC_AT_KEYWORD(NAME)  \
307  if (LangOpts.ObjC)           \
308    AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this);
309#define INTERESTING_IDENTIFIER(NAME)                                           \
310  AddInterestingIdentifier(StringRef(#NAME), tok::NAME, *this);
311
312#define TESTING_KEYWORD(NAME, FLAGS)
313#include "clang/Basic/TokenKinds.def"
314
315  if (LangOpts.ParseUnknownAnytype)
316    AddKeyword("__unknown_anytype", tok::kw___unknown_anytype, KEYALL,
317               LangOpts, *this);
318
319  if (LangOpts.DeclSpecKeyword)
320    AddKeyword("__declspec", tok::kw___declspec, KEYALL, LangOpts, *this);
321
322  if (LangOpts.IEEE128)
323    AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this);
324
325  // Add the 'import' contextual keyword.
326  get("import").setModulesImport(true);
327}
328
329/// Checks if the specified token kind represents a keyword in the
330/// specified language.
331/// \returns Status of the keyword in the language.
332static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts,
333                                      tok::TokenKind K) {
334  switch (K) {
335#define KEYWORD(NAME, FLAGS) \
336  case tok::kw_##NAME: return getKeywordStatus(LangOpts, FLAGS);
337#include "clang/Basic/TokenKinds.def"
338  default: return KS_Disabled;
339  }
340}
341
342/// Returns true if the identifier represents a keyword in the
343/// specified language.
344bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const {
345  switch (getTokenKwStatus(LangOpts, getTokenID())) {
346  case KS_Enabled:
347  case KS_Extension:
348    return true;
349  default:
350    return false;
351  }
352}
353
354/// Returns true if the identifier represents a C++ keyword in the
355/// specified language.
356bool IdentifierInfo::isCPlusPlusKeyword(const LangOptions &LangOpts) const {
357  if (!LangOpts.CPlusPlus || !isKeyword(LangOpts))
358    return false;
359  // This is a C++ keyword if this identifier is not a keyword when checked
360  // using LangOptions without C++ support.
361  LangOptions LangOptsNoCPP = LangOpts;
362  LangOptsNoCPP.CPlusPlus = false;
363  LangOptsNoCPP.CPlusPlus11 = false;
364  LangOptsNoCPP.CPlusPlus20 = false;
365  return !isKeyword(LangOptsNoCPP);
366}
367
368ReservedIdentifierStatus
369IdentifierInfo::isReserved(const LangOptions &LangOpts) const {
370  StringRef Name = getName();
371
372  // '_' is a reserved identifier, but its use is so common (e.g. to store
373  // ignored values) that we don't warn on it.
374  if (Name.size() <= 1)
375    return ReservedIdentifierStatus::NotReserved;
376
377  // [lex.name] p3
378  if (Name[0] == '_') {
379
380    // Each name that begins with an underscore followed by an uppercase letter
381    // or another underscore is reserved.
382    if (Name[1] == '_')
383      return ReservedIdentifierStatus::StartsWithDoubleUnderscore;
384
385    if ('A' <= Name[1] && Name[1] <= 'Z')
386      return ReservedIdentifierStatus::
387          StartsWithUnderscoreFollowedByCapitalLetter;
388
389    // This is a bit misleading: it actually means it's only reserved if we're
390    // at global scope because it starts with an underscore.
391    return ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope;
392  }
393
394  // Each name that contains a double underscore (__) is reserved.
395  if (LangOpts.CPlusPlus && Name.contains("__"))
396    return ReservedIdentifierStatus::ContainsDoubleUnderscore;
397
398  return ReservedIdentifierStatus::NotReserved;
399}
400
401ReservedLiteralSuffixIdStatus
402IdentifierInfo::isReservedLiteralSuffixId() const {
403  StringRef Name = getName();
404
405  if (Name[0] != '_')
406    return ReservedLiteralSuffixIdStatus::NotStartsWithUnderscore;
407
408  if (Name.contains("__"))
409    return ReservedLiteralSuffixIdStatus::ContainsDoubleUnderscore;
410
411  return ReservedLiteralSuffixIdStatus::NotReserved;
412}
413
414StringRef IdentifierInfo::deuglifiedName() const {
415  StringRef Name = getName();
416  if (Name.size() >= 2 && Name.front() == '_' &&
417      (Name[1] == '_' || (Name[1] >= 'A' && Name[1] <= 'Z')))
418    return Name.ltrim('_');
419  return Name;
420}
421
422tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
423  // We use a perfect hash function here involving the length of the keyword,
424  // the first and third character.  For preprocessor ID's there are no
425  // collisions (if there were, the switch below would complain about duplicate
426  // case values).  Note that this depends on 'if' being null terminated.
427
428#define HASH(LEN, FIRST, THIRD) \
429  (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
430#define CASE(LEN, FIRST, THIRD, NAME) \
431  case HASH(LEN, FIRST, THIRD): \
432    return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
433
434  unsigned Len = getLength();
435  if (Len < 2) return tok::pp_not_keyword;
436  const char *Name = getNameStart();
437  switch (HASH(Len, Name[0], Name[2])) {
438  default: return tok::pp_not_keyword;
439  CASE( 2, 'i', '\0', if);
440  CASE( 4, 'e', 'i', elif);
441  CASE( 4, 'e', 's', else);
442  CASE( 4, 'l', 'n', line);
443  CASE( 4, 's', 'c', sccs);
444  CASE( 5, 'e', 'd', endif);
445  CASE( 5, 'e', 'r', error);
446  CASE( 5, 'i', 'e', ident);
447  CASE( 5, 'i', 'd', ifdef);
448  CASE( 5, 'u', 'd', undef);
449
450  CASE( 6, 'a', 's', assert);
451  CASE( 6, 'd', 'f', define);
452  CASE( 6, 'i', 'n', ifndef);
453  CASE( 6, 'i', 'p', import);
454  CASE( 6, 'p', 'a', pragma);
455
456  CASE( 7, 'd', 'f', defined);
457  CASE( 7, 'e', 'i', elifdef);
458  CASE( 7, 'i', 'c', include);
459  CASE( 7, 'w', 'r', warning);
460
461  CASE( 8, 'e', 'i', elifndef);
462  CASE( 8, 'u', 'a', unassert);
463  CASE(12, 'i', 'c', include_next);
464
465  CASE(14, '_', 'p', __public_macro);
466
467  CASE(15, '_', 'p', __private_macro);
468
469  CASE(16, '_', 'i', __include_macros);
470#undef CASE
471#undef HASH
472  }
473}
474
475//===----------------------------------------------------------------------===//
476// Stats Implementation
477//===----------------------------------------------------------------------===//
478
479/// PrintStats - Print statistics about how well the identifier table is doing
480/// at hashing identifiers.
481void IdentifierTable::PrintStats() const {
482  unsigned NumBuckets = HashTable.getNumBuckets();
483  unsigned NumIdentifiers = HashTable.getNumItems();
484  unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
485  unsigned AverageIdentifierSize = 0;
486  unsigned MaxIdentifierLength = 0;
487
488  // TODO: Figure out maximum times an identifier had to probe for -stats.
489  for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator
490       I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
491    unsigned IdLen = I->getKeyLength();
492    AverageIdentifierSize += IdLen;
493    if (MaxIdentifierLength < IdLen)
494      MaxIdentifierLength = IdLen;
495  }
496
497  fprintf(stderr, "\n*** Identifier Table Stats:\n");
498  fprintf(stderr, "# Identifiers:   %d\n", NumIdentifiers);
499  fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
500  fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
501          NumIdentifiers/(double)NumBuckets);
502  fprintf(stderr, "Ave identifier length: %f\n",
503          (AverageIdentifierSize/(double)NumIdentifiers));
504  fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
505
506  // Compute statistics about the memory allocated for identifiers.
507  HashTable.getAllocator().PrintStats();
508}
509
510//===----------------------------------------------------------------------===//
511// SelectorTable Implementation
512//===----------------------------------------------------------------------===//
513
514unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
515  return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
516}
517
518bool Selector::isKeywordSelector(ArrayRef<StringRef> Names) const {
519  assert(!Names.empty() && "must have >= 1 selector slots");
520  if (getNumArgs() != Names.size())
521    return false;
522  for (unsigned I = 0, E = Names.size(); I != E; ++I) {
523    if (getNameForSlot(I) != Names[I])
524      return false;
525  }
526  return true;
527}
528
529bool Selector::isUnarySelector(StringRef Name) const {
530  return isUnarySelector() && getNameForSlot(0) == Name;
531}
532
533unsigned Selector::getNumArgs() const {
534  unsigned IIF = getIdentifierInfoFlag();
535  if (IIF <= ZeroArg)
536    return 0;
537  if (IIF == OneArg)
538    return 1;
539  // We point to a MultiKeywordSelector.
540  MultiKeywordSelector *SI = getMultiKeywordSelector();
541  return SI->getNumArgs();
542}
543
544IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
545  if (getIdentifierInfoFlag() < MultiArg) {
546    assert(argIndex == 0 && "illegal keyword index");
547    return getAsIdentifierInfo();
548  }
549
550  // We point to a MultiKeywordSelector.
551  MultiKeywordSelector *SI = getMultiKeywordSelector();
552  return SI->getIdentifierInfoForSlot(argIndex);
553}
554
555StringRef Selector::getNameForSlot(unsigned int argIndex) const {
556  IdentifierInfo *II = getIdentifierInfoForSlot(argIndex);
557  return II ? II->getName() : StringRef();
558}
559
560std::string MultiKeywordSelector::getName() const {
561  SmallString<256> Str;
562  llvm::raw_svector_ostream OS(Str);
563  for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
564    if (*I)
565      OS << (*I)->getName();
566    OS << ':';
567  }
568
569  return std::string(OS.str());
570}
571
572std::string Selector::getAsString() const {
573  if (isNull())
574    return "<null selector>";
575
576  if (getIdentifierInfoFlag() < MultiArg) {
577    IdentifierInfo *II = getAsIdentifierInfo();
578
579    if (getNumArgs() == 0) {
580      assert(II && "If the number of arguments is 0 then II is guaranteed to "
581                   "not be null.");
582      return std::string(II->getName());
583    }
584
585    if (!II)
586      return ":";
587
588    return II->getName().str() + ":";
589  }
590
591  // We have a multiple keyword selector.
592  return getMultiKeywordSelector()->getName();
593}
594
595void Selector::print(llvm::raw_ostream &OS) const {
596  OS << getAsString();
597}
598
599LLVM_DUMP_METHOD void Selector::dump() const { print(llvm::errs()); }
600
601/// Interpreting the given string using the normal CamelCase
602/// conventions, determine whether the given string starts with the
603/// given "word", which is assumed to end in a lowercase letter.
604static bool startsWithWord(StringRef name, StringRef word) {
605  if (name.size() < word.size()) return false;
606  return ((name.size() == word.size() || !isLowercase(name[word.size()])) &&
607          name.starts_with(word));
608}
609
610ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) {
611  IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
612  if (!first) return OMF_None;
613
614  StringRef name = first->getName();
615  if (sel.isUnarySelector()) {
616    if (name == "autorelease") return OMF_autorelease;
617    if (name == "dealloc") return OMF_dealloc;
618    if (name == "finalize") return OMF_finalize;
619    if (name == "release") return OMF_release;
620    if (name == "retain") return OMF_retain;
621    if (name == "retainCount") return OMF_retainCount;
622    if (name == "self") return OMF_self;
623    if (name == "initialize") return OMF_initialize;
624  }
625
626  if (name == "performSelector" || name == "performSelectorInBackground" ||
627      name == "performSelectorOnMainThread")
628    return OMF_performSelector;
629
630  // The other method families may begin with a prefix of underscores.
631  name = name.ltrim('_');
632
633  if (name.empty()) return OMF_None;
634  switch (name.front()) {
635  case 'a':
636    if (startsWithWord(name, "alloc")) return OMF_alloc;
637    break;
638  case 'c':
639    if (startsWithWord(name, "copy")) return OMF_copy;
640    break;
641  case 'i':
642    if (startsWithWord(name, "init")) return OMF_init;
643    break;
644  case 'm':
645    if (startsWithWord(name, "mutableCopy")) return OMF_mutableCopy;
646    break;
647  case 'n':
648    if (startsWithWord(name, "new")) return OMF_new;
649    break;
650  default:
651    break;
652  }
653
654  return OMF_None;
655}
656
657ObjCInstanceTypeFamily Selector::getInstTypeMethodFamily(Selector sel) {
658  IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
659  if (!first) return OIT_None;
660
661  StringRef name = first->getName();
662
663  if (name.empty()) return OIT_None;
664  switch (name.front()) {
665    case 'a':
666      if (startsWithWord(name, "array")) return OIT_Array;
667      break;
668    case 'd':
669      if (startsWithWord(name, "default")) return OIT_ReturnsSelf;
670      if (startsWithWord(name, "dictionary")) return OIT_Dictionary;
671      break;
672    case 's':
673      if (startsWithWord(name, "shared")) return OIT_ReturnsSelf;
674      if (startsWithWord(name, "standard")) return OIT_Singleton;
675      break;
676    case 'i':
677      if (startsWithWord(name, "init")) return OIT_Init;
678      break;
679    default:
680      break;
681  }
682  return OIT_None;
683}
684
685ObjCStringFormatFamily Selector::getStringFormatFamilyImpl(Selector sel) {
686  IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
687  if (!first) return SFF_None;
688
689  StringRef name = first->getName();
690
691  switch (name.front()) {
692    case 'a':
693      if (name == "appendFormat") return SFF_NSString;
694      break;
695
696    case 'i':
697      if (name == "initWithFormat") return SFF_NSString;
698      break;
699
700    case 'l':
701      if (name == "localizedStringWithFormat") return SFF_NSString;
702      break;
703
704    case 's':
705      if (name == "stringByAppendingFormat" ||
706          name == "stringWithFormat") return SFF_NSString;
707      break;
708  }
709  return SFF_None;
710}
711
712namespace {
713
714struct SelectorTableImpl {
715  llvm::FoldingSet<MultiKeywordSelector> Table;
716  llvm::BumpPtrAllocator Allocator;
717};
718
719} // namespace
720
721static SelectorTableImpl &getSelectorTableImpl(void *P) {
722  return *static_cast<SelectorTableImpl*>(P);
723}
724
725SmallString<64>
726SelectorTable::constructSetterName(StringRef Name) {
727  SmallString<64> SetterName("set");
728  SetterName += Name;
729  SetterName[3] = toUppercase(SetterName[3]);
730  return SetterName;
731}
732
733Selector
734SelectorTable::constructSetterSelector(IdentifierTable &Idents,
735                                       SelectorTable &SelTable,
736                                       const IdentifierInfo *Name) {
737  IdentifierInfo *SetterName =
738    &Idents.get(constructSetterName(Name->getName()));
739  return SelTable.getUnarySelector(SetterName);
740}
741
742std::string SelectorTable::getPropertyNameFromSetterSelector(Selector Sel) {
743  StringRef Name = Sel.getNameForSlot(0);
744  assert(Name.starts_with("set") && "invalid setter name");
745  return (Twine(toLowercase(Name[3])) + Name.drop_front(4)).str();
746}
747
748size_t SelectorTable::getTotalMemory() const {
749  SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
750  return SelTabImpl.Allocator.getTotalMemory();
751}
752
753Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) {
754  if (nKeys < 2)
755    return Selector(IIV[0], nKeys);
756
757  SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
758
759  // Unique selector, to guarantee there is one per name.
760  llvm::FoldingSetNodeID ID;
761  MultiKeywordSelector::Profile(ID, IIV, nKeys);
762
763  void *InsertPos = nullptr;
764  if (MultiKeywordSelector *SI =
765        SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos))
766    return Selector(SI);
767
768  // MultiKeywordSelector objects are not allocated with new because they have a
769  // variable size array (for parameter types) at the end of them.
770  unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *);
771  MultiKeywordSelector *SI =
772      (MultiKeywordSelector *)SelTabImpl.Allocator.Allocate(
773          Size, alignof(MultiKeywordSelector));
774  new (SI) MultiKeywordSelector(nKeys, IIV);
775  SelTabImpl.Table.InsertNode(SI, InsertPos);
776  return Selector(SI);
777}
778
779SelectorTable::SelectorTable() {
780  Impl = new SelectorTableImpl();
781}
782
783SelectorTable::~SelectorTable() {
784  delete &getSelectorTableImpl(Impl);
785}
786
787const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) {
788  switch (Operator) {
789  case OO_None:
790  case NUM_OVERLOADED_OPERATORS:
791    return nullptr;
792
793#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
794  case OO_##Name: return Spelling;
795#include "clang/Basic/OperatorKinds.def"
796  }
797
798  llvm_unreachable("Invalid OverloadedOperatorKind!");
799}
800
801StringRef clang::getNullabilitySpelling(NullabilityKind kind,
802                                        bool isContextSensitive) {
803  switch (kind) {
804  case NullabilityKind::NonNull:
805    return isContextSensitive ? "nonnull" : "_Nonnull";
806
807  case NullabilityKind::Nullable:
808    return isContextSensitive ? "nullable" : "_Nullable";
809
810  case NullabilityKind::NullableResult:
811    assert(!isContextSensitive &&
812           "_Nullable_result isn't supported as context-sensitive keyword");
813    return "_Nullable_result";
814
815  case NullabilityKind::Unspecified:
816    return isContextSensitive ? "null_unspecified" : "_Null_unspecified";
817  }
818  llvm_unreachable("Unknown nullability kind.");
819}
820
821llvm::raw_ostream &clang::operator<<(llvm::raw_ostream &OS,
822                                     NullabilityKind NK) {
823  switch (NK) {
824  case NullabilityKind::NonNull:
825    return OS << "NonNull";
826  case NullabilityKind::Nullable:
827    return OS << "Nullable";
828  case NullabilityKind::NullableResult:
829    return OS << "NullableResult";
830  case NullabilityKind::Unspecified:
831    return OS << "Unspecified";
832  }
833  llvm_unreachable("Unknown nullability kind.");
834}
835
836diag::kind
837IdentifierTable::getFutureCompatDiagKind(const IdentifierInfo &II,
838                                         const LangOptions &LangOpts) {
839  assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
840
841  unsigned Flags = llvm::StringSwitch<unsigned>(II.getName())
842#define KEYWORD(NAME, FLAGS) .Case(#NAME, FLAGS)
843#include "clang/Basic/TokenKinds.def"
844#undef KEYWORD
845      ;
846
847  if (LangOpts.CPlusPlus) {
848    if ((Flags & KEYCXX11) == KEYCXX11)
849      return diag::warn_cxx11_keyword;
850
851    // char8_t is not modeled as a CXX20_KEYWORD because it's not
852    // unconditionally enabled in C++20 mode. (It can be disabled
853    // by -fno-char8_t.)
854    if (((Flags & KEYCXX20) == KEYCXX20) ||
855        ((Flags & CHAR8SUPPORT) == CHAR8SUPPORT))
856      return diag::warn_cxx20_keyword;
857  } else {
858    if ((Flags & KEYC99) == KEYC99)
859      return diag::warn_c99_keyword;
860    if ((Flags & KEYC23) == KEYC23)
861      return diag::warn_c23_keyword;
862  }
863
864  llvm_unreachable(
865      "Keyword not known to come from a newer Standard or proposed Standard");
866}
867