FoldingSet.cpp revision 226633
1189251Ssam//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===// 2189251Ssam// 3189251Ssam// The LLVM Compiler Infrastructure 4189251Ssam// 5252726Srpaulo// This file is distributed under the University of Illinois Open Source 6252726Srpaulo// License. See LICENSE.TXT for details. 7189251Ssam// 8189251Ssam//===----------------------------------------------------------------------===// 9189251Ssam// 10189251Ssam// This file implements a hash set that can be used to remove duplication of 11189251Ssam// nodes in a graph. This code was originally created by Chris Lattner for use 12189251Ssam// with SelectionDAGCSEMap, but was isolated to provide use across the llvm code 13189251Ssam// set. 14189251Ssam// 15189251Ssam//===----------------------------------------------------------------------===// 16189251Ssam 17189251Ssam#include "llvm/ADT/FoldingSet.h" 18214734Srpaulo#include "llvm/Support/Allocator.h" 19189251Ssam#include "llvm/Support/ErrorHandling.h" 20189251Ssam#include "llvm/Support/MathExtras.h" 21214734Srpaulo#include "llvm/Support/Host.h" 22214734Srpaulo#include <cassert> 23189251Ssam#include <cstring> 24189251Ssamusing namespace llvm; 25189251Ssam 26189251Ssam//===----------------------------------------------------------------------===// 27189251Ssam// FoldingSetNodeIDRef Implementation 28189251Ssam 29189251Ssam/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef, 30189251Ssam/// used to lookup the node in the FoldingSetImpl. 31189251Ssamunsigned FoldingSetNodeIDRef::ComputeHash() const { 32189251Ssam // This is adapted from SuperFastHash by Paul Hsieh. 33189251Ssam unsigned Hash = static_cast<unsigned>(Size); 34189251Ssam for (const unsigned *BP = Data, *E = BP+Size; BP != E; ++BP) { 35189251Ssam unsigned Data = *BP; 36189251Ssam Hash += Data & 0xFFFF; 37189251Ssam unsigned Tmp = ((Data >> 16) << 11) ^ Hash; 38189251Ssam Hash = (Hash << 16) ^ Tmp; 39189251Ssam Hash += Hash >> 11; 40189251Ssam } 41189251Ssam 42189251Ssam // Force "avalanching" of final 127 bits. 43189251Ssam Hash ^= Hash << 3; 44189251Ssam Hash += Hash >> 5; 45189251Ssam Hash ^= Hash << 4; 46189251Ssam Hash += Hash >> 17; 47189251Ssam Hash ^= Hash << 25; 48189251Ssam Hash += Hash >> 6; 49189251Ssam return Hash; 50189251Ssam} 51189251Ssam 52189251Ssambool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const { 53189251Ssam if (Size != RHS.Size) return false; 54189251Ssam return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0; 55189251Ssam} 56189251Ssam 57189251Ssam//===----------------------------------------------------------------------===// 58189251Ssam// FoldingSetNodeID Implementation 59189251Ssam 60189251Ssam/// Add* - Add various data types to Bit data. 61189251Ssam/// 62189251Ssamvoid FoldingSetNodeID::AddPointer(const void *Ptr) { 63189251Ssam // Note: this adds pointers to the hash using sizes and endianness that 64189251Ssam // depend on the host. It doesn't matter however, because hashing on 65189251Ssam // pointer values in inherently unstable. Nothing should depend on the 66189251Ssam // ordering of nodes in the folding set. 67189251Ssam Bits.append(reinterpret_cast<unsigned *>(&Ptr), 68189251Ssam reinterpret_cast<unsigned *>(&Ptr+1)); 69189251Ssam} 70189251Ssamvoid FoldingSetNodeID::AddInteger(signed I) { 71189251Ssam Bits.push_back(I); 72189251Ssam} 73189251Ssamvoid FoldingSetNodeID::AddInteger(unsigned I) { 74189251Ssam Bits.push_back(I); 75189251Ssam} 76189251Ssamvoid FoldingSetNodeID::AddInteger(long I) { 77189251Ssam AddInteger((unsigned long)I); 78189251Ssam} 79189251Ssamvoid FoldingSetNodeID::AddInteger(unsigned long I) { 80189251Ssam if (sizeof(long) == sizeof(int)) 81189251Ssam AddInteger(unsigned(I)); 82189251Ssam else if (sizeof(long) == sizeof(long long)) { 83189251Ssam AddInteger((unsigned long long)I); 84189251Ssam } else { 85189251Ssam llvm_unreachable("unexpected sizeof(long)"); 86189251Ssam } 87189251Ssam} 88189251Ssamvoid FoldingSetNodeID::AddInteger(long long I) { 89189251Ssam AddInteger((unsigned long long)I); 90189251Ssam} 91189251Ssamvoid FoldingSetNodeID::AddInteger(unsigned long long I) { 92189251Ssam AddInteger(unsigned(I)); 93189251Ssam if ((uint64_t)(unsigned)I != I) 94189251Ssam Bits.push_back(unsigned(I >> 32)); 95189251Ssam} 96189251Ssam 97189251Ssamvoid FoldingSetNodeID::AddString(StringRef String) { 98214734Srpaulo unsigned Size = String.size(); 99214734Srpaulo Bits.push_back(Size); 100189251Ssam if (!Size) return; 101189251Ssam 102189251Ssam unsigned Units = Size / 4; 103214734Srpaulo unsigned Pos = 0; 104214734Srpaulo const unsigned *Base = (const unsigned*) String.data(); 105214734Srpaulo 106214734Srpaulo // If the string is aligned do a bulk transfer. 107214734Srpaulo if (!((intptr_t)Base & 3)) { 108214734Srpaulo Bits.append(Base, Base + Units); 109214734Srpaulo Pos = (Units + 1) * 4; 110214734Srpaulo } else { 111214734Srpaulo // Otherwise do it the hard way. 112189251Ssam // To be compatible with above bulk transfer, we need to take endianness 113189251Ssam // into account. 114189251Ssam if (sys::isBigEndianHost()) { 115189251Ssam for (Pos += 4; Pos <= Size; Pos += 4) { 116189251Ssam unsigned V = ((unsigned char)String[Pos - 4] << 24) | 117189251Ssam ((unsigned char)String[Pos - 3] << 16) | 118189251Ssam ((unsigned char)String[Pos - 2] << 8) | 119189251Ssam (unsigned char)String[Pos - 1]; 120189251Ssam Bits.push_back(V); 121189251Ssam } 122189251Ssam } else { 123189251Ssam assert(sys::isLittleEndianHost() && "Unexpected host endianness"); 124189251Ssam for (Pos += 4; Pos <= Size; Pos += 4) { 125189251Ssam unsigned V = ((unsigned char)String[Pos - 1] << 24) | 126189251Ssam ((unsigned char)String[Pos - 2] << 16) | 127189251Ssam ((unsigned char)String[Pos - 3] << 8) | 128189251Ssam (unsigned char)String[Pos - 4]; 129189251Ssam Bits.push_back(V); 130189251Ssam } 131189251Ssam } 132189251Ssam } 133189251Ssam 134189251Ssam // With the leftover bits. 135189251Ssam unsigned V = 0; 136189251Ssam // Pos will have overshot size by 4 - #bytes left over. 137189251Ssam // No need to take endianness into account here - this is always executed. 138189251Ssam switch (Pos - Size) { 139189251Ssam case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru. 140189251Ssam case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru. 141189251Ssam case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break; 142189251Ssam default: return; // Nothing left. 143189251Ssam } 144189251Ssam 145189251Ssam Bits.push_back(V); 146189251Ssam} 147189251Ssam 148189251Ssam// AddNodeID - Adds the Bit data of another ID to *this. 149189251Ssamvoid FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) { 150189251Ssam Bits.append(ID.Bits.begin(), ID.Bits.end()); 151209158Srpaulo} 152189251Ssam 153189251Ssam/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to 154189251Ssam/// lookup the node in the FoldingSetImpl. 155189251Ssamunsigned FoldingSetNodeID::ComputeHash() const { 156209158Srpaulo return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash(); 157189251Ssam} 158189251Ssam 159189251Ssam/// operator== - Used to compare two nodes to each other. 160189251Ssam/// 161189251Ssambool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{ 162189251Ssam return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); 163189251Ssam} 164189251Ssam 165189251Ssam/// operator== - Used to compare two nodes to each other. 166189251Ssam/// 167189251Ssambool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const { 168189251Ssam return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS; 169189251Ssam} 170189251Ssam 171189251Ssam/// Intern - Copy this node's data to a memory region allocated from the 172189251Ssam/// given allocator and return a FoldingSetNodeIDRef describing the 173189251Ssam/// interned data. 174189251SsamFoldingSetNodeIDRef 175189251SsamFoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { 176189251Ssam unsigned *New = Allocator.Allocate<unsigned>(Bits.size()); 177189251Ssam std::uninitialized_copy(Bits.begin(), Bits.end(), New); 178189251Ssam return FoldingSetNodeIDRef(New, Bits.size()); 179189251Ssam} 180189251Ssam 181189251Ssam//===----------------------------------------------------------------------===// 182189251Ssam/// Helper functions for FoldingSetImpl. 183189251Ssam 184189251Ssam/// GetNextPtr - In order to save space, each bucket is a 185189251Ssam/// singly-linked-list. In order to make deletion more efficient, we make 186189251Ssam/// the list circular, so we can delete a node without computing its hash. 187189251Ssam/// The problem with this is that the start of the hash buckets are not 188189251Ssam/// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null: 189189251Ssam/// use GetBucketPtr when this happens. 190189251Ssamstatic FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) { 191189251Ssam // The low bit is set if this is the pointer back to the bucket. 192189251Ssam if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1) 193189251Ssam return 0; 194189251Ssam 195189251Ssam return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr); 196189251Ssam} 197189251Ssam 198189251Ssam 199189251Ssam/// testing. 200189251Ssamstatic void **GetBucketPtr(void *NextInBucketPtr) { 201189251Ssam intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr); 202189251Ssam assert((Ptr & 1) && "Not a bucket pointer"); 203189251Ssam return reinterpret_cast<void**>(Ptr & ~intptr_t(1)); 204189251Ssam} 205189251Ssam 206189251Ssam/// GetBucketFor - Hash the specified node ID and return the hash bucket for 207189251Ssam/// the specified ID. 208189251Ssamstatic void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) { 209189251Ssam // NumBuckets is always a power of 2. 210189251Ssam unsigned BucketNum = Hash & (NumBuckets-1); 211189251Ssam return Buckets + BucketNum; 212189251Ssam} 213189251Ssam 214189251Ssam/// AllocateBuckets - Allocated initialized bucket memory. 215189251Ssamstatic void **AllocateBuckets(unsigned NumBuckets) { 216189251Ssam void **Buckets = static_cast<void**>(calloc(NumBuckets+1, sizeof(void*))); 217189251Ssam // Set the very last bucket to be a non-null "pointer". 218189251Ssam Buckets[NumBuckets] = reinterpret_cast<void*>(-1); 219189251Ssam return Buckets; 220189251Ssam} 221189251Ssam 222189251Ssam//===----------------------------------------------------------------------===// 223189251Ssam// FoldingSetImpl Implementation 224189251Ssam 225189251SsamFoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { 226189251Ssam assert(5 < Log2InitSize && Log2InitSize < 32 && 227189251Ssam "Initial hash table size out of range"); 228189251Ssam NumBuckets = 1 << Log2InitSize; 229189251Ssam Buckets = AllocateBuckets(NumBuckets); 230189251Ssam NumNodes = 0; 231189251Ssam} 232189251SsamFoldingSetImpl::~FoldingSetImpl() { 233189251Ssam free(Buckets); 234189251Ssam} 235189251Ssamvoid FoldingSetImpl::clear() { 236189251Ssam // Set all but the last bucket to null pointers. 237189251Ssam memset(Buckets, 0, NumBuckets*sizeof(void*)); 238189251Ssam 239189251Ssam // Set the very last bucket to be a non-null "pointer". 240189251Ssam Buckets[NumBuckets] = reinterpret_cast<void*>(-1); 241189251Ssam 242189251Ssam // Reset the node count to zero. 243189251Ssam NumNodes = 0; 244189251Ssam} 245189251Ssam 246189251Ssam/// GrowHashTable - Double the size of the hash table and rehash everything. 247189251Ssam/// 248189251Ssamvoid FoldingSetImpl::GrowHashTable() { 249189251Ssam void **OldBuckets = Buckets; 250189251Ssam unsigned OldNumBuckets = NumBuckets; 251189251Ssam NumBuckets <<= 1; 252189251Ssam 253189251Ssam // Clear out new buckets. 254189251Ssam Buckets = AllocateBuckets(NumBuckets); 255189251Ssam NumNodes = 0; 256189251Ssam 257189251Ssam // Walk the old buckets, rehashing nodes into their new place. 258189251Ssam FoldingSetNodeID TempID; 259189251Ssam for (unsigned i = 0; i != OldNumBuckets; ++i) { 260189251Ssam void *Probe = OldBuckets[i]; 261189251Ssam if (!Probe) continue; 262189251Ssam while (Node *NodeInBucket = GetNextPtr(Probe)) { 263189251Ssam // Figure out the next link, remove NodeInBucket from the old link. 264189251Ssam Probe = NodeInBucket->getNextInBucket(); 265189251Ssam NodeInBucket->SetNextInBucket(0); 266189251Ssam 267189251Ssam // Insert the node into the new bucket, after recomputing the hash. 268189251Ssam InsertNode(NodeInBucket, 269189251Ssam GetBucketFor(ComputeNodeHash(NodeInBucket, TempID), 270189251Ssam Buckets, NumBuckets)); 271189251Ssam TempID.clear(); 272189251Ssam } 273189251Ssam } 274189251Ssam 275189251Ssam free(OldBuckets); 276189251Ssam} 277189251Ssam 278189251Ssam/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, 279189251Ssam/// return it. If not, return the insertion token that will make insertion 280189251Ssam/// faster. 281189251SsamFoldingSetImpl::Node 282189251Ssam*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID, 283189251Ssam void *&InsertPos) { 284214734Srpaulo 285214734Srpaulo void **Bucket = GetBucketFor(ID.ComputeHash(), Buckets, NumBuckets); 286189251Ssam void *Probe = *Bucket; 287189251Ssam 288189251Ssam InsertPos = 0; 289189251Ssam 290189251Ssam FoldingSetNodeID TempID; 291189251Ssam while (Node *NodeInBucket = GetNextPtr(Probe)) { 292189251Ssam if (NodeEquals(NodeInBucket, ID, TempID)) 293189251Ssam return NodeInBucket; 294189251Ssam TempID.clear(); 295189251Ssam 296189251Ssam Probe = NodeInBucket->getNextInBucket(); 297189251Ssam } 298189251Ssam 299189251Ssam // Didn't find the node, return null with the bucket as the InsertPos. 300189251Ssam InsertPos = Bucket; 301189251Ssam return 0; 302189251Ssam} 303189251Ssam 304189251Ssam/// InsertNode - Insert the specified node into the folding set, knowing that it 305189251Ssam/// is not already in the map. InsertPos must be obtained from 306189251Ssam/// FindNodeOrInsertPos. 307189251Ssamvoid FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { 308189251Ssam assert(N->getNextInBucket() == 0); 309189251Ssam // Do we need to grow the hashtable? 310189251Ssam if (NumNodes+1 > NumBuckets*2) { 311189251Ssam GrowHashTable(); 312189251Ssam FoldingSetNodeID TempID; 313189251Ssam InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets); 314189251Ssam } 315189251Ssam 316189251Ssam ++NumNodes; 317189251Ssam 318189251Ssam /// The insert position is actually a bucket pointer. 319189251Ssam void **Bucket = static_cast<void**>(InsertPos); 320189251Ssam 321189251Ssam void *Next = *Bucket; 322189251Ssam 323189251Ssam // If this is the first insertion into this bucket, its next pointer will be 324189251Ssam // null. Pretend as if it pointed to itself, setting the low bit to indicate 325189251Ssam // that it is a pointer to the bucket. 326189251Ssam if (Next == 0) 327189251Ssam Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1); 328189251Ssam 329189251Ssam // Set the node's next pointer, and make the bucket point to the node. 330189251Ssam N->SetNextInBucket(Next); 331189251Ssam *Bucket = N; 332189251Ssam} 333189251Ssam 334189251Ssam/// RemoveNode - Remove a node from the folding set, returning true if one was 335189251Ssam/// removed or false if the node was not in the folding set. 336189251Ssambool FoldingSetImpl::RemoveNode(Node *N) { 337189251Ssam // Because each bucket is a circular list, we don't need to compute N's hash 338189251Ssam // to remove it. 339189251Ssam void *Ptr = N->getNextInBucket(); 340189251Ssam if (Ptr == 0) return false; // Not in folding set. 341189251Ssam 342189251Ssam --NumNodes; 343189251Ssam N->SetNextInBucket(0); 344189251Ssam 345189251Ssam // Remember what N originally pointed to, either a bucket or another node. 346189251Ssam void *NodeNextPtr = Ptr; 347189251Ssam 348189251Ssam // Chase around the list until we find the node (or bucket) which points to N. 349189251Ssam while (true) { 350189251Ssam if (Node *NodeInBucket = GetNextPtr(Ptr)) { 351189251Ssam // Advance pointer. 352189251Ssam Ptr = NodeInBucket->getNextInBucket(); 353189251Ssam 354189251Ssam // We found a node that points to N, change it to point to N's next node, 355189251Ssam // removing N from the list. 356189251Ssam if (Ptr == N) { 357189251Ssam NodeInBucket->SetNextInBucket(NodeNextPtr); 358189251Ssam return true; 359189251Ssam } 360189251Ssam } else { 361189251Ssam void **Bucket = GetBucketPtr(Ptr); 362189251Ssam Ptr = *Bucket; 363189251Ssam 364189251Ssam // If we found that the bucket points to N, update the bucket to point to 365189251Ssam // whatever is next. 366189251Ssam if (Ptr == N) { 367189251Ssam *Bucket = NodeNextPtr; 368189251Ssam return true; 369189251Ssam } 370189251Ssam } 371189251Ssam } 372189251Ssam} 373189251Ssam 374189251Ssam/// GetOrInsertNode - If there is an existing simple Node exactly 375189251Ssam/// equal to the specified node, return it. Otherwise, insert 'N' and it 376189251Ssam/// instead. 377189251SsamFoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) { 378189251Ssam FoldingSetNodeID ID; 379189251Ssam GetNodeProfile(N, ID); 380189251Ssam void *IP; 381189251Ssam if (Node *E = FindNodeOrInsertPos(ID, IP)) 382189251Ssam return E; 383189251Ssam InsertNode(N, IP); 384189251Ssam return N; 385189251Ssam} 386189251Ssam 387189251Ssam//===----------------------------------------------------------------------===// 388189251Ssam// FoldingSetIteratorImpl Implementation 389189251Ssam 390189251SsamFoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) { 391189251Ssam // Skip to the first non-null non-self-cycle bucket. 392189251Ssam while (*Bucket != reinterpret_cast<void*>(-1) && 393189251Ssam (*Bucket == 0 || GetNextPtr(*Bucket) == 0)) 394189251Ssam ++Bucket; 395189251Ssam 396189251Ssam NodePtr = static_cast<FoldingSetNode*>(*Bucket); 397189251Ssam} 398189251Ssam 399189251Ssamvoid FoldingSetIteratorImpl::advance() { 400189251Ssam // If there is another link within this bucket, go to it. 401189251Ssam void *Probe = NodePtr->getNextInBucket(); 402189251Ssam 403189251Ssam if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe)) 404189251Ssam NodePtr = NextNodeInBucket; 405189251Ssam else { 406189251Ssam // Otherwise, this is the last link in this bucket. 407189251Ssam void **Bucket = GetBucketPtr(Probe); 408189251Ssam 409189251Ssam // Skip to the next non-null non-self-cycle bucket. 410189251Ssam do { 411189251Ssam ++Bucket; 412189251Ssam } while (*Bucket != reinterpret_cast<void*>(-1) && 413189251Ssam (*Bucket == 0 || GetNextPtr(*Bucket) == 0)); 414189251Ssam 415189251Ssam NodePtr = static_cast<FoldingSetNode*>(*Bucket); 416189251Ssam } 417189251Ssam} 418189251Ssam 419189251Ssam//===----------------------------------------------------------------------===// 420189251Ssam// FoldingSetBucketIteratorImpl Implementation 421189251Ssam 422189251SsamFoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) { 423189251Ssam Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket; 424189251Ssam} 425189251Ssam