FoldingSet.cpp revision 198090
1193323Sed//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file implements a hash set that can be used to remove duplication of 11193323Sed// nodes in a graph. This code was originally created by Chris Lattner for use 12193323Sed// with SelectionDAGCSEMap, but was isolated to provide use across the llvm code 13193323Sed// set. 14193323Sed// 15193323Sed//===----------------------------------------------------------------------===// 16193323Sed 17193323Sed#include "llvm/ADT/FoldingSet.h" 18198090Srdivacky#include "llvm/Support/ErrorHandling.h" 19193323Sed#include "llvm/Support/MathExtras.h" 20193323Sed#include <cassert> 21193323Sed#include <cstring> 22193323Sedusing namespace llvm; 23193323Sed 24193323Sed//===----------------------------------------------------------------------===// 25193323Sed// FoldingSetNodeID Implementation 26193323Sed 27193323Sed/// Add* - Add various data types to Bit data. 28193323Sed/// 29193323Sedvoid FoldingSetNodeID::AddPointer(const void *Ptr) { 30193323Sed // Note: this adds pointers to the hash using sizes and endianness that 31193323Sed // depend on the host. It doesn't matter however, because hashing on 32193323Sed // pointer values in inherently unstable. Nothing should depend on the 33193323Sed // ordering of nodes in the folding set. 34193323Sed intptr_t PtrI = (intptr_t)Ptr; 35193323Sed Bits.push_back(unsigned(PtrI)); 36193323Sed if (sizeof(intptr_t) > sizeof(unsigned)) 37193323Sed Bits.push_back(unsigned(uint64_t(PtrI) >> 32)); 38193323Sed} 39193323Sedvoid FoldingSetNodeID::AddInteger(signed I) { 40193323Sed Bits.push_back(I); 41193323Sed} 42193323Sedvoid FoldingSetNodeID::AddInteger(unsigned I) { 43193323Sed Bits.push_back(I); 44193323Sed} 45193323Sedvoid FoldingSetNodeID::AddInteger(long I) { 46193323Sed AddInteger((unsigned long)I); 47193323Sed} 48193323Sedvoid FoldingSetNodeID::AddInteger(unsigned long I) { 49193323Sed if (sizeof(long) == sizeof(int)) 50193323Sed AddInteger(unsigned(I)); 51193323Sed else if (sizeof(long) == sizeof(long long)) { 52193323Sed AddInteger((unsigned long long)I); 53193323Sed } else { 54198090Srdivacky llvm_unreachable("unexpected sizeof(long)"); 55193323Sed } 56193323Sed} 57193323Sedvoid FoldingSetNodeID::AddInteger(long long I) { 58193323Sed AddInteger((unsigned long long)I); 59193323Sed} 60193323Sedvoid FoldingSetNodeID::AddInteger(unsigned long long I) { 61193323Sed AddInteger(unsigned(I)); 62193323Sed if ((uint64_t)(int)I != I) 63193323Sed Bits.push_back(unsigned(I >> 32)); 64193323Sed} 65193323Sed 66198090Srdivackyvoid FoldingSetNodeID::AddString(StringRef String) { 67198090Srdivacky unsigned Size = String.size(); 68193323Sed Bits.push_back(Size); 69193323Sed if (!Size) return; 70193323Sed 71193323Sed unsigned Units = Size / 4; 72193323Sed unsigned Pos = 0; 73198090Srdivacky const unsigned *Base = (const unsigned*) String.data(); 74193323Sed 75193323Sed // If the string is aligned do a bulk transfer. 76193323Sed if (!((intptr_t)Base & 3)) { 77193323Sed Bits.append(Base, Base + Units); 78193323Sed Pos = (Units + 1) * 4; 79193323Sed } else { 80193323Sed // Otherwise do it the hard way. 81193323Sed for (Pos += 4; Pos <= Size; Pos += 4) { 82193323Sed unsigned V = ((unsigned char)String[Pos - 4] << 24) | 83193323Sed ((unsigned char)String[Pos - 3] << 16) | 84193323Sed ((unsigned char)String[Pos - 2] << 8) | 85193323Sed (unsigned char)String[Pos - 1]; 86193323Sed Bits.push_back(V); 87193323Sed } 88193323Sed } 89193323Sed 90193323Sed // With the leftover bits. 91193323Sed unsigned V = 0; 92193323Sed // Pos will have overshot size by 4 - #bytes left over. 93193323Sed switch (Pos - Size) { 94193323Sed case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru. 95193323Sed case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru. 96193323Sed case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break; 97193323Sed default: return; // Nothing left. 98193323Sed } 99193323Sed 100193323Sed Bits.push_back(V); 101193323Sed} 102193323Sed 103193323Sed/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to 104193323Sed/// lookup the node in the FoldingSetImpl. 105193323Sedunsigned FoldingSetNodeID::ComputeHash() const { 106193323Sed // This is adapted from SuperFastHash by Paul Hsieh. 107193323Sed unsigned Hash = static_cast<unsigned>(Bits.size()); 108193323Sed for (const unsigned *BP = &Bits[0], *E = BP+Bits.size(); BP != E; ++BP) { 109193323Sed unsigned Data = *BP; 110193323Sed Hash += Data & 0xFFFF; 111193323Sed unsigned Tmp = ((Data >> 16) << 11) ^ Hash; 112193323Sed Hash = (Hash << 16) ^ Tmp; 113193323Sed Hash += Hash >> 11; 114193323Sed } 115193323Sed 116193323Sed // Force "avalanching" of final 127 bits. 117193323Sed Hash ^= Hash << 3; 118193323Sed Hash += Hash >> 5; 119193323Sed Hash ^= Hash << 4; 120193323Sed Hash += Hash >> 17; 121193323Sed Hash ^= Hash << 25; 122193323Sed Hash += Hash >> 6; 123193323Sed return Hash; 124193323Sed} 125193323Sed 126193323Sed/// operator== - Used to compare two nodes to each other. 127193323Sed/// 128193323Sedbool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{ 129193323Sed if (Bits.size() != RHS.Bits.size()) return false; 130193323Sed return memcmp(&Bits[0], &RHS.Bits[0], Bits.size()*sizeof(Bits[0])) == 0; 131193323Sed} 132193323Sed 133193323Sed 134193323Sed//===----------------------------------------------------------------------===// 135193323Sed/// Helper functions for FoldingSetImpl. 136193323Sed 137193323Sed/// GetNextPtr - In order to save space, each bucket is a 138193323Sed/// singly-linked-list. In order to make deletion more efficient, we make 139193323Sed/// the list circular, so we can delete a node without computing its hash. 140193323Sed/// The problem with this is that the start of the hash buckets are not 141193323Sed/// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null: 142193323Sed/// use GetBucketPtr when this happens. 143193323Sedstatic FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) { 144193323Sed // The low bit is set if this is the pointer back to the bucket. 145193323Sed if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1) 146193323Sed return 0; 147193323Sed 148193323Sed return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr); 149193323Sed} 150193323Sed 151193323Sed 152193323Sed/// testing. 153193323Sedstatic void **GetBucketPtr(void *NextInBucketPtr) { 154193323Sed intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr); 155193323Sed assert((Ptr & 1) && "Not a bucket pointer"); 156193323Sed return reinterpret_cast<void**>(Ptr & ~intptr_t(1)); 157193323Sed} 158193323Sed 159193323Sed/// GetBucketFor - Hash the specified node ID and return the hash bucket for 160193323Sed/// the specified ID. 161193323Sedstatic void **GetBucketFor(const FoldingSetNodeID &ID, 162193323Sed void **Buckets, unsigned NumBuckets) { 163193323Sed // NumBuckets is always a power of 2. 164193323Sed unsigned BucketNum = ID.ComputeHash() & (NumBuckets-1); 165193323Sed return Buckets + BucketNum; 166193323Sed} 167193323Sed 168193323Sed//===----------------------------------------------------------------------===// 169193323Sed// FoldingSetImpl Implementation 170193323Sed 171193323SedFoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { 172193323Sed assert(5 < Log2InitSize && Log2InitSize < 32 && 173193323Sed "Initial hash table size out of range"); 174193323Sed NumBuckets = 1 << Log2InitSize; 175193323Sed Buckets = new void*[NumBuckets+1]; 176193323Sed clear(); 177193323Sed} 178193323SedFoldingSetImpl::~FoldingSetImpl() { 179193323Sed delete [] Buckets; 180193323Sed} 181193323Sedvoid FoldingSetImpl::clear() { 182193323Sed // Set all but the last bucket to null pointers. 183193323Sed memset(Buckets, 0, NumBuckets*sizeof(void*)); 184193323Sed 185193323Sed // Set the very last bucket to be a non-null "pointer". 186193323Sed Buckets[NumBuckets] = reinterpret_cast<void*>(-1); 187193323Sed 188193323Sed // Reset the node count to zero. 189193323Sed NumNodes = 0; 190193323Sed} 191193323Sed 192193323Sed/// GrowHashTable - Double the size of the hash table and rehash everything. 193193323Sed/// 194193323Sedvoid FoldingSetImpl::GrowHashTable() { 195193323Sed void **OldBuckets = Buckets; 196193323Sed unsigned OldNumBuckets = NumBuckets; 197193323Sed NumBuckets <<= 1; 198193323Sed 199193323Sed // Clear out new buckets. 200193323Sed Buckets = new void*[NumBuckets+1]; 201193323Sed clear(); 202193323Sed 203193323Sed // Walk the old buckets, rehashing nodes into their new place. 204193323Sed FoldingSetNodeID ID; 205193323Sed for (unsigned i = 0; i != OldNumBuckets; ++i) { 206193323Sed void *Probe = OldBuckets[i]; 207193323Sed if (!Probe) continue; 208193323Sed while (Node *NodeInBucket = GetNextPtr(Probe)) { 209193323Sed // Figure out the next link, remove NodeInBucket from the old link. 210193323Sed Probe = NodeInBucket->getNextInBucket(); 211193323Sed NodeInBucket->SetNextInBucket(0); 212193323Sed 213193323Sed // Insert the node into the new bucket, after recomputing the hash. 214193323Sed GetNodeProfile(ID, NodeInBucket); 215193323Sed InsertNode(NodeInBucket, GetBucketFor(ID, Buckets, NumBuckets)); 216193323Sed ID.clear(); 217193323Sed } 218193323Sed } 219193323Sed 220193323Sed delete[] OldBuckets; 221193323Sed} 222193323Sed 223193323Sed/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, 224193323Sed/// return it. If not, return the insertion token that will make insertion 225193323Sed/// faster. 226193323SedFoldingSetImpl::Node 227193323Sed*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID, 228193323Sed void *&InsertPos) { 229193323Sed 230193323Sed void **Bucket = GetBucketFor(ID, Buckets, NumBuckets); 231193323Sed void *Probe = *Bucket; 232193323Sed 233193323Sed InsertPos = 0; 234193323Sed 235193323Sed FoldingSetNodeID OtherID; 236193323Sed while (Node *NodeInBucket = GetNextPtr(Probe)) { 237193323Sed GetNodeProfile(OtherID, NodeInBucket); 238193323Sed if (OtherID == ID) 239193323Sed return NodeInBucket; 240193323Sed 241193323Sed Probe = NodeInBucket->getNextInBucket(); 242193323Sed OtherID.clear(); 243193323Sed } 244193323Sed 245193323Sed // Didn't find the node, return null with the bucket as the InsertPos. 246193323Sed InsertPos = Bucket; 247193323Sed return 0; 248193323Sed} 249193323Sed 250193323Sed/// InsertNode - Insert the specified node into the folding set, knowing that it 251193323Sed/// is not already in the map. InsertPos must be obtained from 252193323Sed/// FindNodeOrInsertPos. 253193323Sedvoid FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { 254193323Sed assert(N->getNextInBucket() == 0); 255193323Sed // Do we need to grow the hashtable? 256193323Sed if (NumNodes+1 > NumBuckets*2) { 257193323Sed GrowHashTable(); 258193323Sed FoldingSetNodeID ID; 259193323Sed GetNodeProfile(ID, N); 260193323Sed InsertPos = GetBucketFor(ID, Buckets, NumBuckets); 261193323Sed } 262193323Sed 263193323Sed ++NumNodes; 264193323Sed 265193323Sed /// The insert position is actually a bucket pointer. 266193323Sed void **Bucket = static_cast<void**>(InsertPos); 267193323Sed 268193323Sed void *Next = *Bucket; 269193323Sed 270193323Sed // If this is the first insertion into this bucket, its next pointer will be 271193323Sed // null. Pretend as if it pointed to itself, setting the low bit to indicate 272193323Sed // that it is a pointer to the bucket. 273193323Sed if (Next == 0) 274193323Sed Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1); 275193323Sed 276193323Sed // Set the node's next pointer, and make the bucket point to the node. 277193323Sed N->SetNextInBucket(Next); 278193323Sed *Bucket = N; 279193323Sed} 280193323Sed 281193323Sed/// RemoveNode - Remove a node from the folding set, returning true if one was 282193323Sed/// removed or false if the node was not in the folding set. 283193323Sedbool FoldingSetImpl::RemoveNode(Node *N) { 284193323Sed // Because each bucket is a circular list, we don't need to compute N's hash 285193323Sed // to remove it. 286193323Sed void *Ptr = N->getNextInBucket(); 287193323Sed if (Ptr == 0) return false; // Not in folding set. 288193323Sed 289193323Sed --NumNodes; 290193323Sed N->SetNextInBucket(0); 291193323Sed 292193323Sed // Remember what N originally pointed to, either a bucket or another node. 293193323Sed void *NodeNextPtr = Ptr; 294193323Sed 295193323Sed // Chase around the list until we find the node (or bucket) which points to N. 296193323Sed while (true) { 297193323Sed if (Node *NodeInBucket = GetNextPtr(Ptr)) { 298193323Sed // Advance pointer. 299193323Sed Ptr = NodeInBucket->getNextInBucket(); 300193323Sed 301193323Sed // We found a node that points to N, change it to point to N's next node, 302193323Sed // removing N from the list. 303193323Sed if (Ptr == N) { 304193323Sed NodeInBucket->SetNextInBucket(NodeNextPtr); 305193323Sed return true; 306193323Sed } 307193323Sed } else { 308193323Sed void **Bucket = GetBucketPtr(Ptr); 309193323Sed Ptr = *Bucket; 310193323Sed 311193323Sed // If we found that the bucket points to N, update the bucket to point to 312193323Sed // whatever is next. 313193323Sed if (Ptr == N) { 314193323Sed *Bucket = NodeNextPtr; 315193323Sed return true; 316193323Sed } 317193323Sed } 318193323Sed } 319193323Sed} 320193323Sed 321193323Sed/// GetOrInsertNode - If there is an existing simple Node exactly 322193323Sed/// equal to the specified node, return it. Otherwise, insert 'N' and it 323193323Sed/// instead. 324193323SedFoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) { 325193323Sed FoldingSetNodeID ID; 326193323Sed GetNodeProfile(ID, N); 327193323Sed void *IP; 328193323Sed if (Node *E = FindNodeOrInsertPos(ID, IP)) 329193323Sed return E; 330193323Sed InsertNode(N, IP); 331193323Sed return N; 332193323Sed} 333193323Sed 334193323Sed//===----------------------------------------------------------------------===// 335193323Sed// FoldingSetIteratorImpl Implementation 336193323Sed 337193323SedFoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) { 338193323Sed // Skip to the first non-null non-self-cycle bucket. 339193323Sed while (*Bucket != reinterpret_cast<void*>(-1) && 340193323Sed (*Bucket == 0 || GetNextPtr(*Bucket) == 0)) 341193323Sed ++Bucket; 342193323Sed 343193323Sed NodePtr = static_cast<FoldingSetNode*>(*Bucket); 344193323Sed} 345193323Sed 346193323Sedvoid FoldingSetIteratorImpl::advance() { 347193323Sed // If there is another link within this bucket, go to it. 348193323Sed void *Probe = NodePtr->getNextInBucket(); 349193323Sed 350193323Sed if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe)) 351193323Sed NodePtr = NextNodeInBucket; 352193323Sed else { 353193323Sed // Otherwise, this is the last link in this bucket. 354193323Sed void **Bucket = GetBucketPtr(Probe); 355193323Sed 356193323Sed // Skip to the next non-null non-self-cycle bucket. 357193323Sed do { 358193323Sed ++Bucket; 359193323Sed } while (*Bucket != reinterpret_cast<void*>(-1) && 360193323Sed (*Bucket == 0 || GetNextPtr(*Bucket) == 0)); 361193323Sed 362193323Sed NodePtr = static_cast<FoldingSetNode*>(*Bucket); 363193323Sed } 364193323Sed} 365193323Sed 366193323Sed//===----------------------------------------------------------------------===// 367193323Sed// FoldingSetBucketIteratorImpl Implementation 368193323Sed 369193323SedFoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) { 370193323Sed Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket; 371193323Sed} 372