FoldingSet.cpp revision 212904
1193323Sed//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file implements a hash set that can be used to remove duplication of 11193323Sed// nodes in a graph. This code was originally created by Chris Lattner for use 12193323Sed// with SelectionDAGCSEMap, but was isolated to provide use across the llvm code 13193323Sed// set. 14193323Sed// 15193323Sed//===----------------------------------------------------------------------===// 16193323Sed 17193323Sed#include "llvm/ADT/FoldingSet.h" 18205407Srdivacky#include "llvm/Support/Allocator.h" 19198090Srdivacky#include "llvm/Support/ErrorHandling.h" 20193323Sed#include "llvm/Support/MathExtras.h" 21193323Sed#include <cassert> 22193323Sed#include <cstring> 23193323Sedusing namespace llvm; 24193323Sed 25193323Sed//===----------------------------------------------------------------------===// 26193323Sed// FoldingSetNodeIDRef Implementation 27193323Sed 28193323Sed/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef, 29193323Sed/// used to lookup the node in the FoldingSetImpl. 30193323Sedunsigned FoldingSetNodeIDRef::ComputeHash() const { 31193323Sed // This is adapted from SuperFastHash by Paul Hsieh. 32193323Sed unsigned Hash = static_cast<unsigned>(Size); 33193323Sed for (const unsigned *BP = Data, *E = BP+Size; BP != E; ++BP) { 34193323Sed unsigned Data = *BP; 35193323Sed Hash += Data & 0xFFFF; 36193323Sed unsigned Tmp = ((Data >> 16) << 11) ^ Hash; 37193323Sed Hash = (Hash << 16) ^ Tmp; 38193323Sed Hash += Hash >> 11; 39193323Sed } 40193323Sed 41193323Sed // Force "avalanching" of final 127 bits. 42193323Sed Hash ^= Hash << 3; 43193323Sed Hash += Hash >> 5; 44193323Sed Hash ^= Hash << 4; 45193323Sed Hash += Hash >> 17; 46193323Sed Hash ^= Hash << 25; 47193323Sed Hash += Hash >> 6; 48193323Sed return Hash; 49193323Sed} 50193323Sed 51193323Sedbool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const { 52193323Sed if (Size != RHS.Size) return false; 53193323Sed return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0; 54193323Sed} 55198090Srdivacky 56193323Sed//===----------------------------------------------------------------------===// 57193323Sed// FoldingSetNodeID Implementation 58193323Sed 59193323Sed/// Add* - Add various data types to Bit data. 60193323Sed/// 61193323Sedvoid FoldingSetNodeID::AddPointer(const void *Ptr) { 62193323Sed // Note: this adds pointers to the hash using sizes and endianness that 63193323Sed // depend on the host. It doesn't matter however, because hashing on 64193323Sed // pointer values in inherently unstable. Nothing should depend on the 65193323Sed // ordering of nodes in the folding set. 66193323Sed intptr_t PtrI = (intptr_t)Ptr; 67198090Srdivacky Bits.push_back(unsigned(PtrI)); 68198090Srdivacky if (sizeof(intptr_t) > sizeof(unsigned)) 69193323Sed Bits.push_back(unsigned(uint64_t(PtrI) >> 32)); 70193323Sed} 71193323Sedvoid FoldingSetNodeID::AddInteger(signed I) { 72193323Sed Bits.push_back(I); 73193323Sed} 74198090Srdivackyvoid FoldingSetNodeID::AddInteger(unsigned I) { 75193323Sed Bits.push_back(I); 76193323Sed} 77193323Sedvoid FoldingSetNodeID::AddInteger(long I) { 78193323Sed AddInteger((unsigned long)I); 79193323Sed} 80193323Sedvoid FoldingSetNodeID::AddInteger(unsigned long I) { 81193323Sed if (sizeof(long) == sizeof(int)) 82193323Sed AddInteger(unsigned(I)); 83193323Sed else if (sizeof(long) == sizeof(long long)) { 84193323Sed AddInteger((unsigned long long)I); 85193323Sed } else { 86193323Sed llvm_unreachable("unexpected sizeof(long)"); 87193323Sed } 88193323Sed} 89193323Sedvoid FoldingSetNodeID::AddInteger(long long I) { 90193323Sed AddInteger((unsigned long long)I); 91193323Sed} 92193323Sedvoid FoldingSetNodeID::AddInteger(unsigned long long I) { 93193323Sed AddInteger(unsigned(I)); 94193323Sed if ((uint64_t)(int)I != I) 95193323Sed Bits.push_back(unsigned(I >> 32)); 96193323Sed} 97193323Sed 98193323Sedvoid FoldingSetNodeID::AddString(StringRef String) { 99193323Sed unsigned Size = String.size(); 100193323Sed Bits.push_back(Size); 101193323Sed if (!Size) return; 102193323Sed 103193323Sed unsigned Units = Size / 4; 104193323Sed unsigned Pos = 0; 105193323Sed const unsigned *Base = (const unsigned*) String.data(); 106193323Sed 107193323Sed // If the string is aligned do a bulk transfer. 108193323Sed if (!((intptr_t)Base & 3)) { 109193323Sed Bits.append(Base, Base + Units); 110193323Sed Pos = (Units + 1) * 4; 111193323Sed } else { 112193323Sed // Otherwise do it the hard way. 113193323Sed for (Pos += 4; Pos <= Size; Pos += 4) { 114193323Sed unsigned V = ((unsigned char)String[Pos - 4] << 24) | 115193323Sed ((unsigned char)String[Pos - 3] << 16) | 116193323Sed ((unsigned char)String[Pos - 2] << 8) | 117193323Sed (unsigned char)String[Pos - 1]; 118193323Sed Bits.push_back(V); 119193323Sed } 120193323Sed } 121193323Sed 122193323Sed // With the leftover bits. 123193323Sed unsigned V = 0; 124193323Sed // Pos will have overshot size by 4 - #bytes left over. 125193323Sed switch (Pos - Size) { 126193323Sed case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru. 127193323Sed case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru. 128193323Sed case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break; 129193323Sed default: return; // Nothing left. 130193323Sed } 131193323Sed 132193323Sed Bits.push_back(V); 133193323Sed} 134205407Srdivacky 135205407Srdivacky/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to 136205407Srdivacky/// lookup the node in the FoldingSetImpl. 137205407Srdivackyunsigned FoldingSetNodeID::ComputeHash() const { 138205407Srdivacky return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash(); 139205407Srdivacky} 140205407Srdivacky 141205407Srdivacky/// operator== - Used to compare two nodes to each other. 142205407Srdivacky/// 143193323Sedbool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{ 144193323Sed return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); 145193323Sed} 146193323Sed 147193323Sed/// operator== - Used to compare two nodes to each other. 148193323Sed/// 149193323Sedbool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const { 150193323Sed return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS; 151193323Sed} 152193323Sed 153193323Sed/// Intern - Copy this node's data to a memory region allocated from the 154193323Sed/// given allocator and return a FoldingSetNodeIDRef describing the 155193323Sed/// interned data. 156193323SedFoldingSetNodeIDRef 157193323SedFoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { 158193323Sed unsigned *New = Allocator.Allocate<unsigned>(Bits.size()); 159193323Sed std::uninitialized_copy(Bits.begin(), Bits.end(), New); 160193323Sed return FoldingSetNodeIDRef(New, Bits.size()); 161193323Sed} 162193323Sed 163193323Sed//===----------------------------------------------------------------------===// 164193323Sed/// Helper functions for FoldingSetImpl. 165193323Sed 166193323Sed/// GetNextPtr - In order to save space, each bucket is a 167193323Sed/// singly-linked-list. In order to make deletion more efficient, we make 168193323Sed/// the list circular, so we can delete a node without computing its hash. 169193323Sed/// The problem with this is that the start of the hash buckets are not 170193323Sed/// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null: 171193323Sed/// use GetBucketPtr when this happens. 172193323Sedstatic FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) { 173193323Sed // The low bit is set if this is the pointer back to the bucket. 174193323Sed if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1) 175193323Sed return 0; 176193323Sed 177193323Sed return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr); 178193323Sed} 179193323Sed 180193323Sed 181193323Sed/// testing. 182193323Sedstatic void **GetBucketPtr(void *NextInBucketPtr) { 183193323Sed intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr); 184193323Sed assert((Ptr & 1) && "Not a bucket pointer"); 185193323Sed return reinterpret_cast<void**>(Ptr & ~intptr_t(1)); 186193323Sed} 187193323Sed 188193323Sed/// GetBucketFor - Hash the specified node ID and return the hash bucket for 189193323Sed/// the specified ID. 190193323Sedstatic void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) { 191193323Sed // NumBuckets is always a power of 2. 192193323Sed unsigned BucketNum = Hash & (NumBuckets-1); 193193323Sed return Buckets + BucketNum; 194193323Sed} 195193323Sed 196193323Sed/// AllocateBuckets - Allocated initialized bucket memory. 197193323Sedstatic void **AllocateBuckets(unsigned NumBuckets) { 198193323Sed void **Buckets = static_cast<void**>(calloc(NumBuckets+1, sizeof(void*))); 199193323Sed // Set the very last bucket to be a non-null "pointer". 200193323Sed Buckets[NumBuckets] = reinterpret_cast<void*>(-1); 201193323Sed return Buckets; 202193323Sed} 203193323Sed 204193323Sed//===----------------------------------------------------------------------===// 205193323Sed// FoldingSetImpl Implementation 206193323Sed 207193323SedFoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { 208193323Sed assert(5 < Log2InitSize && Log2InitSize < 32 && 209193323Sed "Initial hash table size out of range"); 210193323Sed NumBuckets = 1 << Log2InitSize; 211193323Sed Buckets = AllocateBuckets(NumBuckets); 212193323Sed NumNodes = 0; 213193323Sed} 214193323SedFoldingSetImpl::~FoldingSetImpl() { 215193323Sed free(Buckets); 216193323Sed} 217193323Sedvoid FoldingSetImpl::clear() { 218193323Sed // Set all but the last bucket to null pointers. 219193323Sed memset(Buckets, 0, NumBuckets*sizeof(void*)); 220193323Sed 221193323Sed // Set the very last bucket to be a non-null "pointer". 222193323Sed Buckets[NumBuckets] = reinterpret_cast<void*>(-1); 223193323Sed 224193323Sed // Reset the node count to zero. 225193323Sed NumNodes = 0; 226193323Sed} 227193323Sed 228193323Sed/// GrowHashTable - Double the size of the hash table and rehash everything. 229193323Sed/// 230193323Sedvoid FoldingSetImpl::GrowHashTable() { 231193323Sed void **OldBuckets = Buckets; 232193323Sed unsigned OldNumBuckets = NumBuckets; 233193323Sed NumBuckets <<= 1; 234193323Sed 235193323Sed // Clear out new buckets. 236193323Sed Buckets = AllocateBuckets(NumBuckets); 237193323Sed NumNodes = 0; 238193323Sed 239193323Sed // Walk the old buckets, rehashing nodes into their new place. 240193323Sed FoldingSetNodeID TempID; 241193323Sed for (unsigned i = 0; i != OldNumBuckets; ++i) { 242193323Sed void *Probe = OldBuckets[i]; 243193323Sed if (!Probe) continue; 244193323Sed while (Node *NodeInBucket = GetNextPtr(Probe)) { 245193323Sed // Figure out the next link, remove NodeInBucket from the old link. 246193323Sed Probe = NodeInBucket->getNextInBucket(); 247193323Sed NodeInBucket->SetNextInBucket(0); 248193323Sed 249193323Sed // Insert the node into the new bucket, after recomputing the hash. 250193323Sed InsertNode(NodeInBucket, 251193323Sed GetBucketFor(ComputeNodeHash(NodeInBucket, TempID), 252193323Sed Buckets, NumBuckets)); 253193323Sed TempID.clear(); 254193323Sed } 255193323Sed } 256193323Sed 257193323Sed free(OldBuckets); 258193323Sed} 259193323Sed 260193323Sed/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, 261193323Sed/// return it. If not, return the insertion token that will make insertion 262193323Sed/// faster. 263193323SedFoldingSetImpl::Node 264193323Sed*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID, 265193323Sed void *&InsertPos) { 266193323Sed 267193323Sed void **Bucket = GetBucketFor(ID.ComputeHash(), Buckets, NumBuckets); 268193323Sed void *Probe = *Bucket; 269193323Sed 270193323Sed InsertPos = 0; 271193323Sed 272193323Sed FoldingSetNodeID TempID; 273193323Sed while (Node *NodeInBucket = GetNextPtr(Probe)) { 274193323Sed if (NodeEquals(NodeInBucket, ID, TempID)) 275193323Sed return NodeInBucket; 276193323Sed TempID.clear(); 277193323Sed 278193323Sed Probe = NodeInBucket->getNextInBucket(); 279193323Sed } 280193323Sed 281193323Sed // Didn't find the node, return null with the bucket as the InsertPos. 282193323Sed InsertPos = Bucket; 283193323Sed return 0; 284193323Sed} 285193323Sed 286193323Sed/// InsertNode - Insert the specified node into the folding set, knowing that it 287193323Sed/// is not already in the map. InsertPos must be obtained from 288193323Sed/// FindNodeOrInsertPos. 289193323Sedvoid FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { 290193323Sed assert(N->getNextInBucket() == 0); 291193323Sed // Do we need to grow the hashtable? 292193323Sed if (NumNodes+1 > NumBuckets*2) { 293193323Sed GrowHashTable(); 294193323Sed FoldingSetNodeID TempID; 295193323Sed InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets); 296193323Sed } 297193323Sed 298193323Sed ++NumNodes; 299193323Sed 300193323Sed /// The insert position is actually a bucket pointer. 301193323Sed void **Bucket = static_cast<void**>(InsertPos); 302193323Sed 303193323Sed void *Next = *Bucket; 304193323Sed 305193323Sed // If this is the first insertion into this bucket, its next pointer will be 306193323Sed // null. Pretend as if it pointed to itself, setting the low bit to indicate 307193323Sed // that it is a pointer to the bucket. 308193323Sed if (Next == 0) 309193323Sed Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1); 310193323Sed 311193323Sed // Set the node's next pointer, and make the bucket point to the node. 312193323Sed N->SetNextInBucket(Next); 313193323Sed *Bucket = N; 314193323Sed} 315193323Sed 316193323Sed/// RemoveNode - Remove a node from the folding set, returning true if one was 317193323Sed/// removed or false if the node was not in the folding set. 318193323Sedbool FoldingSetImpl::RemoveNode(Node *N) { 319193323Sed // Because each bucket is a circular list, we don't need to compute N's hash 320193323Sed // to remove it. 321193323Sed void *Ptr = N->getNextInBucket(); 322193323Sed if (Ptr == 0) return false; // Not in folding set. 323193323Sed 324193323Sed --NumNodes; 325193323Sed N->SetNextInBucket(0); 326193323Sed 327193323Sed // Remember what N originally pointed to, either a bucket or another node. 328193323Sed void *NodeNextPtr = Ptr; 329193323Sed 330193323Sed // Chase around the list until we find the node (or bucket) which points to N. 331193323Sed while (true) { 332193323Sed if (Node *NodeInBucket = GetNextPtr(Ptr)) { 333193323Sed // Advance pointer. 334193323Sed Ptr = NodeInBucket->getNextInBucket(); 335193323Sed 336193323Sed // We found a node that points to N, change it to point to N's next node, 337193323Sed // removing N from the list. 338193323Sed if (Ptr == N) { 339193323Sed NodeInBucket->SetNextInBucket(NodeNextPtr); 340193323Sed return true; 341193323Sed } 342193323Sed } else { 343193323Sed void **Bucket = GetBucketPtr(Ptr); 344193323Sed Ptr = *Bucket; 345193323Sed 346193323Sed // If we found that the bucket points to N, update the bucket to point to 347193323Sed // whatever is next. 348193323Sed if (Ptr == N) { 349193323Sed *Bucket = NodeNextPtr; 350193323Sed return true; 351193323Sed } 352193323Sed } 353193323Sed } 354193323Sed} 355193323Sed 356193323Sed/// GetOrInsertNode - If there is an existing simple Node exactly 357193323Sed/// equal to the specified node, return it. Otherwise, insert 'N' and it 358193323Sed/// instead. 359193323SedFoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) { 360193323Sed FoldingSetNodeID ID; 361193323Sed GetNodeProfile(N, ID); 362193323Sed void *IP; 363193323Sed if (Node *E = FindNodeOrInsertPos(ID, IP)) 364193323Sed return E; 365193323Sed InsertNode(N, IP); 366193323Sed return N; 367193323Sed} 368193323Sed 369193323Sed//===----------------------------------------------------------------------===// 370193323Sed// FoldingSetIteratorImpl Implementation 371193323Sed 372193323SedFoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) { 373193323Sed // Skip to the first non-null non-self-cycle bucket. 374193323Sed while (*Bucket != reinterpret_cast<void*>(-1) && 375193323Sed (*Bucket == 0 || GetNextPtr(*Bucket) == 0)) 376193323Sed ++Bucket; 377193323Sed 378193323Sed NodePtr = static_cast<FoldingSetNode*>(*Bucket); 379193323Sed} 380193323Sed 381193323Sedvoid FoldingSetIteratorImpl::advance() { 382 // If there is another link within this bucket, go to it. 383 void *Probe = NodePtr->getNextInBucket(); 384 385 if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe)) 386 NodePtr = NextNodeInBucket; 387 else { 388 // Otherwise, this is the last link in this bucket. 389 void **Bucket = GetBucketPtr(Probe); 390 391 // Skip to the next non-null non-self-cycle bucket. 392 do { 393 ++Bucket; 394 } while (*Bucket != reinterpret_cast<void*>(-1) && 395 (*Bucket == 0 || GetNextPtr(*Bucket) == 0)); 396 397 NodePtr = static_cast<FoldingSetNode*>(*Bucket); 398 } 399} 400 401//===----------------------------------------------------------------------===// 402// FoldingSetBucketIteratorImpl Implementation 403 404FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) { 405 Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket; 406} 407