1193323Sed//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file implements a hash set that can be used to remove duplication of 11249423Sdim// nodes in a graph. 12193323Sed// 13193323Sed//===----------------------------------------------------------------------===// 14193323Sed 15193323Sed#include "llvm/ADT/FoldingSet.h" 16234353Sdim#include "llvm/ADT/Hashing.h" 17205407Srdivacky#include "llvm/Support/Allocator.h" 18198090Srdivacky#include "llvm/Support/ErrorHandling.h" 19249423Sdim#include "llvm/Support/Host.h" 20193323Sed#include "llvm/Support/MathExtras.h" 21193323Sed#include <cassert> 22193323Sed#include <cstring> 23193323Sedusing namespace llvm; 24193323Sed 25193323Sed//===----------------------------------------------------------------------===// 26212904Sdim// FoldingSetNodeIDRef Implementation 27212904Sdim 28212904Sdim/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef, 29212904Sdim/// used to lookup the node in the FoldingSetImpl. 30212904Sdimunsigned FoldingSetNodeIDRef::ComputeHash() const { 31234353Sdim return static_cast<unsigned>(hash_combine_range(Data, Data+Size)); 32212904Sdim} 33212904Sdim 34212904Sdimbool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const { 35212904Sdim if (Size != RHS.Size) return false; 36212904Sdim return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0; 37212904Sdim} 38212904Sdim 39243830Sdim/// Used to compare the "ordering" of two nodes as defined by the 40243830Sdim/// profiled bits and their ordering defined by memcmp(). 41243830Sdimbool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const { 42243830Sdim if (Size != RHS.Size) 43243830Sdim return Size < RHS.Size; 44243830Sdim return memcmp(Data, RHS.Data, Size*sizeof(*Data)) < 0; 45243830Sdim} 46243830Sdim 47212904Sdim//===----------------------------------------------------------------------===// 48193323Sed// FoldingSetNodeID Implementation 49193323Sed 50193323Sed/// Add* - Add various data types to Bit data. 51193323Sed/// 52193323Sedvoid FoldingSetNodeID::AddPointer(const void *Ptr) { 53193323Sed // Note: this adds pointers to the hash using sizes and endianness that 54288943Sdim // depend on the host. It doesn't matter, however, because hashing on 55288943Sdim // pointer values is inherently unstable. Nothing should depend on the 56193323Sed // ordering of nodes in the folding set. 57226633Sdim Bits.append(reinterpret_cast<unsigned *>(&Ptr), 58226633Sdim reinterpret_cast<unsigned *>(&Ptr+1)); 59193323Sed} 60193323Sedvoid FoldingSetNodeID::AddInteger(signed I) { 61193323Sed Bits.push_back(I); 62193323Sed} 63193323Sedvoid FoldingSetNodeID::AddInteger(unsigned I) { 64193323Sed Bits.push_back(I); 65193323Sed} 66193323Sedvoid FoldingSetNodeID::AddInteger(long I) { 67193323Sed AddInteger((unsigned long)I); 68193323Sed} 69193323Sedvoid FoldingSetNodeID::AddInteger(unsigned long I) { 70193323Sed if (sizeof(long) == sizeof(int)) 71193323Sed AddInteger(unsigned(I)); 72193323Sed else if (sizeof(long) == sizeof(long long)) { 73193323Sed AddInteger((unsigned long long)I); 74193323Sed } else { 75198090Srdivacky llvm_unreachable("unexpected sizeof(long)"); 76193323Sed } 77193323Sed} 78193323Sedvoid FoldingSetNodeID::AddInteger(long long I) { 79193323Sed AddInteger((unsigned long long)I); 80193323Sed} 81193323Sedvoid FoldingSetNodeID::AddInteger(unsigned long long I) { 82193323Sed AddInteger(unsigned(I)); 83223017Sdim if ((uint64_t)(unsigned)I != I) 84193323Sed Bits.push_back(unsigned(I >> 32)); 85193323Sed} 86193323Sed 87198090Srdivackyvoid FoldingSetNodeID::AddString(StringRef String) { 88198090Srdivacky unsigned Size = String.size(); 89193323Sed Bits.push_back(Size); 90193323Sed if (!Size) return; 91193323Sed 92193323Sed unsigned Units = Size / 4; 93193323Sed unsigned Pos = 0; 94198090Srdivacky const unsigned *Base = (const unsigned*) String.data(); 95193323Sed 96193323Sed // If the string is aligned do a bulk transfer. 97193323Sed if (!((intptr_t)Base & 3)) { 98193323Sed Bits.append(Base, Base + Units); 99193323Sed Pos = (Units + 1) * 4; 100193323Sed } else { 101193323Sed // Otherwise do it the hard way. 102218893Sdim // To be compatible with above bulk transfer, we need to take endianness 103218893Sdim // into account. 104288943Sdim static_assert(sys::IsBigEndianHost || sys::IsLittleEndianHost, 105288943Sdim "Unexpected host endianness"); 106251662Sdim if (sys::IsBigEndianHost) { 107218893Sdim for (Pos += 4; Pos <= Size; Pos += 4) { 108218893Sdim unsigned V = ((unsigned char)String[Pos - 4] << 24) | 109218893Sdim ((unsigned char)String[Pos - 3] << 16) | 110218893Sdim ((unsigned char)String[Pos - 2] << 8) | 111218893Sdim (unsigned char)String[Pos - 1]; 112218893Sdim Bits.push_back(V); 113218893Sdim } 114288943Sdim } else { // Little-endian host 115218893Sdim for (Pos += 4; Pos <= Size; Pos += 4) { 116218893Sdim unsigned V = ((unsigned char)String[Pos - 1] << 24) | 117218893Sdim ((unsigned char)String[Pos - 2] << 16) | 118218893Sdim ((unsigned char)String[Pos - 3] << 8) | 119218893Sdim (unsigned char)String[Pos - 4]; 120218893Sdim Bits.push_back(V); 121218893Sdim } 122193323Sed } 123193323Sed } 124193323Sed 125193323Sed // With the leftover bits. 126193323Sed unsigned V = 0; 127218893Sdim // Pos will have overshot size by 4 - #bytes left over. 128218893Sdim // No need to take endianness into account here - this is always executed. 129193323Sed switch (Pos - Size) { 130193323Sed case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru. 131193323Sed case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru. 132193323Sed case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break; 133193323Sed default: return; // Nothing left. 134193323Sed } 135193323Sed 136193323Sed Bits.push_back(V); 137193323Sed} 138193323Sed 139221345Sdim// AddNodeID - Adds the Bit data of another ID to *this. 140221345Sdimvoid FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) { 141221345Sdim Bits.append(ID.Bits.begin(), ID.Bits.end()); 142221345Sdim} 143221345Sdim 144193323Sed/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to 145193323Sed/// lookup the node in the FoldingSetImpl. 146193323Sedunsigned FoldingSetNodeID::ComputeHash() const { 147212904Sdim return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash(); 148193323Sed} 149193323Sed 150193323Sed/// operator== - Used to compare two nodes to each other. 151193323Sed/// 152249423Sdimbool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const { 153212904Sdim return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); 154193323Sed} 155193323Sed 156212904Sdim/// operator== - Used to compare two nodes to each other. 157212904Sdim/// 158212904Sdimbool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const { 159212904Sdim return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS; 160212904Sdim} 161212904Sdim 162243830Sdim/// Used to compare the "ordering" of two nodes as defined by the 163243830Sdim/// profiled bits and their ordering defined by memcmp(). 164249423Sdimbool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS) const { 165243830Sdim return *this < FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); 166243830Sdim} 167243830Sdim 168243830Sdimbool FoldingSetNodeID::operator<(FoldingSetNodeIDRef RHS) const { 169243830Sdim return FoldingSetNodeIDRef(Bits.data(), Bits.size()) < RHS; 170243830Sdim} 171243830Sdim 172205407Srdivacky/// Intern - Copy this node's data to a memory region allocated from the 173205407Srdivacky/// given allocator and return a FoldingSetNodeIDRef describing the 174205407Srdivacky/// interned data. 175205407SrdivackyFoldingSetNodeIDRef 176205407SrdivackyFoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { 177205407Srdivacky unsigned *New = Allocator.Allocate<unsigned>(Bits.size()); 178205407Srdivacky std::uninitialized_copy(Bits.begin(), Bits.end(), New); 179205407Srdivacky return FoldingSetNodeIDRef(New, Bits.size()); 180205407Srdivacky} 181193323Sed 182193323Sed//===----------------------------------------------------------------------===// 183193323Sed/// Helper functions for FoldingSetImpl. 184193323Sed 185193323Sed/// GetNextPtr - In order to save space, each bucket is a 186193323Sed/// singly-linked-list. In order to make deletion more efficient, we make 187193323Sed/// the list circular, so we can delete a node without computing its hash. 188193323Sed/// The problem with this is that the start of the hash buckets are not 189193323Sed/// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null: 190193323Sed/// use GetBucketPtr when this happens. 191193323Sedstatic FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) { 192193323Sed // The low bit is set if this is the pointer back to the bucket. 193193323Sed if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1) 194276479Sdim return nullptr; 195193323Sed 196193323Sed return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr); 197193323Sed} 198193323Sed 199193323Sed 200193323Sed/// testing. 201193323Sedstatic void **GetBucketPtr(void *NextInBucketPtr) { 202193323Sed intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr); 203193323Sed assert((Ptr & 1) && "Not a bucket pointer"); 204193323Sed return reinterpret_cast<void**>(Ptr & ~intptr_t(1)); 205193323Sed} 206193323Sed 207193323Sed/// GetBucketFor - Hash the specified node ID and return the hash bucket for 208193323Sed/// the specified ID. 209212904Sdimstatic void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) { 210193323Sed // NumBuckets is always a power of 2. 211212904Sdim unsigned BucketNum = Hash & (NumBuckets-1); 212193323Sed return Buckets + BucketNum; 213193323Sed} 214193323Sed 215210299Sed/// AllocateBuckets - Allocated initialized bucket memory. 216210299Sedstatic void **AllocateBuckets(unsigned NumBuckets) { 217210299Sed void **Buckets = static_cast<void**>(calloc(NumBuckets+1, sizeof(void*))); 218210299Sed // Set the very last bucket to be a non-null "pointer". 219210299Sed Buckets[NumBuckets] = reinterpret_cast<void*>(-1); 220210299Sed return Buckets; 221210299Sed} 222210299Sed 223193323Sed//===----------------------------------------------------------------------===// 224193323Sed// FoldingSetImpl Implementation 225193323Sed 226288943Sdimvoid FoldingSetImpl::anchor() {} 227288943Sdim 228193323SedFoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { 229193323Sed assert(5 < Log2InitSize && Log2InitSize < 32 && 230193323Sed "Initial hash table size out of range"); 231193323Sed NumBuckets = 1 << Log2InitSize; 232210299Sed Buckets = AllocateBuckets(NumBuckets); 233210299Sed NumNodes = 0; 234193323Sed} 235296417Sdim 236296417SdimFoldingSetImpl::FoldingSetImpl(FoldingSetImpl &&Arg) 237296417Sdim : Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) { 238296417Sdim Arg.Buckets = nullptr; 239296417Sdim Arg.NumBuckets = 0; 240296417Sdim Arg.NumNodes = 0; 241296417Sdim} 242296417Sdim 243296417SdimFoldingSetImpl &FoldingSetImpl::operator=(FoldingSetImpl &&RHS) { 244296417Sdim free(Buckets); // This may be null if the set is in a moved-from state. 245296417Sdim Buckets = RHS.Buckets; 246296417Sdim NumBuckets = RHS.NumBuckets; 247296417Sdim NumNodes = RHS.NumNodes; 248296417Sdim RHS.Buckets = nullptr; 249296417Sdim RHS.NumBuckets = 0; 250296417Sdim RHS.NumNodes = 0; 251296417Sdim return *this; 252296417Sdim} 253296417Sdim 254193323SedFoldingSetImpl::~FoldingSetImpl() { 255210299Sed free(Buckets); 256193323Sed} 257296417Sdim 258193323Sedvoid FoldingSetImpl::clear() { 259193323Sed // Set all but the last bucket to null pointers. 260193323Sed memset(Buckets, 0, NumBuckets*sizeof(void*)); 261193323Sed 262193323Sed // Set the very last bucket to be a non-null "pointer". 263193323Sed Buckets[NumBuckets] = reinterpret_cast<void*>(-1); 264193323Sed 265193323Sed // Reset the node count to zero. 266193323Sed NumNodes = 0; 267193323Sed} 268193323Sed 269193323Sed/// GrowHashTable - Double the size of the hash table and rehash everything. 270193323Sed/// 271193323Sedvoid FoldingSetImpl::GrowHashTable() { 272193323Sed void **OldBuckets = Buckets; 273193323Sed unsigned OldNumBuckets = NumBuckets; 274193323Sed NumBuckets <<= 1; 275193323Sed 276193323Sed // Clear out new buckets. 277210299Sed Buckets = AllocateBuckets(NumBuckets); 278210299Sed NumNodes = 0; 279193323Sed 280193323Sed // Walk the old buckets, rehashing nodes into their new place. 281212904Sdim FoldingSetNodeID TempID; 282193323Sed for (unsigned i = 0; i != OldNumBuckets; ++i) { 283193323Sed void *Probe = OldBuckets[i]; 284193323Sed if (!Probe) continue; 285193323Sed while (Node *NodeInBucket = GetNextPtr(Probe)) { 286193323Sed // Figure out the next link, remove NodeInBucket from the old link. 287193323Sed Probe = NodeInBucket->getNextInBucket(); 288276479Sdim NodeInBucket->SetNextInBucket(nullptr); 289193323Sed 290193323Sed // Insert the node into the new bucket, after recomputing the hash. 291212904Sdim InsertNode(NodeInBucket, 292212904Sdim GetBucketFor(ComputeNodeHash(NodeInBucket, TempID), 293212904Sdim Buckets, NumBuckets)); 294212904Sdim TempID.clear(); 295193323Sed } 296193323Sed } 297193323Sed 298210299Sed free(OldBuckets); 299193323Sed} 300193323Sed 301193323Sed/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, 302193323Sed/// return it. If not, return the insertion token that will make insertion 303193323Sed/// faster. 304193323SedFoldingSetImpl::Node 305193323Sed*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID, 306193323Sed void *&InsertPos) { 307234353Sdim unsigned IDHash = ID.ComputeHash(); 308234353Sdim void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets); 309193323Sed void *Probe = *Bucket; 310193323Sed 311276479Sdim InsertPos = nullptr; 312193323Sed 313212904Sdim FoldingSetNodeID TempID; 314193323Sed while (Node *NodeInBucket = GetNextPtr(Probe)) { 315234353Sdim if (NodeEquals(NodeInBucket, ID, IDHash, TempID)) 316193323Sed return NodeInBucket; 317212904Sdim TempID.clear(); 318193323Sed 319193323Sed Probe = NodeInBucket->getNextInBucket(); 320193323Sed } 321193323Sed 322193323Sed // Didn't find the node, return null with the bucket as the InsertPos. 323193323Sed InsertPos = Bucket; 324276479Sdim return nullptr; 325193323Sed} 326193323Sed 327193323Sed/// InsertNode - Insert the specified node into the folding set, knowing that it 328193323Sed/// is not already in the map. InsertPos must be obtained from 329193323Sed/// FindNodeOrInsertPos. 330193323Sedvoid FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { 331276479Sdim assert(!N->getNextInBucket()); 332193323Sed // Do we need to grow the hashtable? 333193323Sed if (NumNodes+1 > NumBuckets*2) { 334193323Sed GrowHashTable(); 335212904Sdim FoldingSetNodeID TempID; 336212904Sdim InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets); 337193323Sed } 338193323Sed 339193323Sed ++NumNodes; 340193323Sed 341193323Sed /// The insert position is actually a bucket pointer. 342193323Sed void **Bucket = static_cast<void**>(InsertPos); 343193323Sed 344193323Sed void *Next = *Bucket; 345193323Sed 346193323Sed // If this is the first insertion into this bucket, its next pointer will be 347193323Sed // null. Pretend as if it pointed to itself, setting the low bit to indicate 348193323Sed // that it is a pointer to the bucket. 349276479Sdim if (!Next) 350193323Sed Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1); 351193323Sed 352193323Sed // Set the node's next pointer, and make the bucket point to the node. 353193323Sed N->SetNextInBucket(Next); 354193323Sed *Bucket = N; 355193323Sed} 356193323Sed 357193323Sed/// RemoveNode - Remove a node from the folding set, returning true if one was 358193323Sed/// removed or false if the node was not in the folding set. 359193323Sedbool FoldingSetImpl::RemoveNode(Node *N) { 360193323Sed // Because each bucket is a circular list, we don't need to compute N's hash 361193323Sed // to remove it. 362193323Sed void *Ptr = N->getNextInBucket(); 363276479Sdim if (!Ptr) return false; // Not in folding set. 364193323Sed 365193323Sed --NumNodes; 366276479Sdim N->SetNextInBucket(nullptr); 367193323Sed 368193323Sed // Remember what N originally pointed to, either a bucket or another node. 369193323Sed void *NodeNextPtr = Ptr; 370193323Sed 371193323Sed // Chase around the list until we find the node (or bucket) which points to N. 372193323Sed while (true) { 373193323Sed if (Node *NodeInBucket = GetNextPtr(Ptr)) { 374193323Sed // Advance pointer. 375193323Sed Ptr = NodeInBucket->getNextInBucket(); 376193323Sed 377193323Sed // We found a node that points to N, change it to point to N's next node, 378193323Sed // removing N from the list. 379193323Sed if (Ptr == N) { 380193323Sed NodeInBucket->SetNextInBucket(NodeNextPtr); 381193323Sed return true; 382193323Sed } 383193323Sed } else { 384193323Sed void **Bucket = GetBucketPtr(Ptr); 385193323Sed Ptr = *Bucket; 386193323Sed 387193323Sed // If we found that the bucket points to N, update the bucket to point to 388193323Sed // whatever is next. 389193323Sed if (Ptr == N) { 390193323Sed *Bucket = NodeNextPtr; 391193323Sed return true; 392193323Sed } 393193323Sed } 394193323Sed } 395193323Sed} 396193323Sed 397193323Sed/// GetOrInsertNode - If there is an existing simple Node exactly 398193323Sed/// equal to the specified node, return it. Otherwise, insert 'N' and it 399193323Sed/// instead. 400193323SedFoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) { 401193323Sed FoldingSetNodeID ID; 402212904Sdim GetNodeProfile(N, ID); 403193323Sed void *IP; 404193323Sed if (Node *E = FindNodeOrInsertPos(ID, IP)) 405193323Sed return E; 406193323Sed InsertNode(N, IP); 407193323Sed return N; 408193323Sed} 409193323Sed 410193323Sed//===----------------------------------------------------------------------===// 411193323Sed// FoldingSetIteratorImpl Implementation 412193323Sed 413193323SedFoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) { 414193323Sed // Skip to the first non-null non-self-cycle bucket. 415193323Sed while (*Bucket != reinterpret_cast<void*>(-1) && 416276479Sdim (!*Bucket || !GetNextPtr(*Bucket))) 417193323Sed ++Bucket; 418193323Sed 419193323Sed NodePtr = static_cast<FoldingSetNode*>(*Bucket); 420193323Sed} 421193323Sed 422193323Sedvoid FoldingSetIteratorImpl::advance() { 423193323Sed // If there is another link within this bucket, go to it. 424193323Sed void *Probe = NodePtr->getNextInBucket(); 425193323Sed 426193323Sed if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe)) 427193323Sed NodePtr = NextNodeInBucket; 428193323Sed else { 429193323Sed // Otherwise, this is the last link in this bucket. 430193323Sed void **Bucket = GetBucketPtr(Probe); 431193323Sed 432193323Sed // Skip to the next non-null non-self-cycle bucket. 433193323Sed do { 434193323Sed ++Bucket; 435193323Sed } while (*Bucket != reinterpret_cast<void*>(-1) && 436276479Sdim (!*Bucket || !GetNextPtr(*Bucket))); 437193323Sed 438193323Sed NodePtr = static_cast<FoldingSetNode*>(*Bucket); 439193323Sed } 440193323Sed} 441193323Sed 442193323Sed//===----------------------------------------------------------------------===// 443193323Sed// FoldingSetBucketIteratorImpl Implementation 444193323Sed 445193323SedFoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) { 446276479Sdim Ptr = (!*Bucket || !GetNextPtr(*Bucket)) ? (void*) Bucket : *Bucket; 447193323Sed} 448