1193323Sed//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file implements a hash set that can be used to remove duplication of 11252723Sdim// nodes in a graph. 12193323Sed// 13193323Sed//===----------------------------------------------------------------------===// 14193323Sed 15193323Sed#include "llvm/ADT/FoldingSet.h" 16235633Sdim#include "llvm/ADT/Hashing.h" 17205407Srdivacky#include "llvm/Support/Allocator.h" 18198090Srdivacky#include "llvm/Support/ErrorHandling.h" 19252723Sdim#include "llvm/Support/Host.h" 20193323Sed#include "llvm/Support/MathExtras.h" 21193323Sed#include <cassert> 22193323Sed#include <cstring> 23193323Sedusing namespace llvm; 24193323Sed 25193323Sed//===----------------------------------------------------------------------===// 26212904Sdim// FoldingSetNodeIDRef Implementation 27212904Sdim 28212904Sdim/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef, 29212904Sdim/// used to lookup the node in the FoldingSetImpl. 30212904Sdimunsigned FoldingSetNodeIDRef::ComputeHash() const { 31235633Sdim return static_cast<unsigned>(hash_combine_range(Data, Data+Size)); 32212904Sdim} 33212904Sdim 34212904Sdimbool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const { 35212904Sdim if (Size != RHS.Size) return false; 36212904Sdim return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0; 37212904Sdim} 38212904Sdim 39245431Sdim/// Used to compare the "ordering" of two nodes as defined by the 40245431Sdim/// profiled bits and their ordering defined by memcmp(). 41245431Sdimbool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const { 42245431Sdim if (Size != RHS.Size) 43245431Sdim return Size < RHS.Size; 44245431Sdim return memcmp(Data, RHS.Data, Size*sizeof(*Data)) < 0; 45245431Sdim} 46245431Sdim 47212904Sdim//===----------------------------------------------------------------------===// 48193323Sed// FoldingSetNodeID Implementation 49193323Sed 50193323Sed/// Add* - Add various data types to Bit data. 51193323Sed/// 52193323Sedvoid FoldingSetNodeID::AddPointer(const void *Ptr) { 53193323Sed // Note: this adds pointers to the hash using sizes and endianness that 54193323Sed // depend on the host. It doesn't matter however, because hashing on 55193323Sed // pointer values in inherently unstable. Nothing should depend on the 56193323Sed // ordering of nodes in the folding set. 57226890Sdim Bits.append(reinterpret_cast<unsigned *>(&Ptr), 58226890Sdim reinterpret_cast<unsigned *>(&Ptr+1)); 59193323Sed} 60193323Sedvoid FoldingSetNodeID::AddInteger(signed I) { 61193323Sed Bits.push_back(I); 62193323Sed} 63193323Sedvoid FoldingSetNodeID::AddInteger(unsigned I) { 64193323Sed Bits.push_back(I); 65193323Sed} 66193323Sedvoid FoldingSetNodeID::AddInteger(long I) { 67193323Sed AddInteger((unsigned long)I); 68193323Sed} 69193323Sedvoid FoldingSetNodeID::AddInteger(unsigned long I) { 70193323Sed if (sizeof(long) == sizeof(int)) 71193323Sed AddInteger(unsigned(I)); 72193323Sed else if (sizeof(long) == sizeof(long long)) { 73193323Sed AddInteger((unsigned long long)I); 74193323Sed } else { 75198090Srdivacky llvm_unreachable("unexpected sizeof(long)"); 76193323Sed } 77193323Sed} 78193323Sedvoid FoldingSetNodeID::AddInteger(long long I) { 79193323Sed AddInteger((unsigned long long)I); 80193323Sed} 81193323Sedvoid FoldingSetNodeID::AddInteger(unsigned long long I) { 82193323Sed AddInteger(unsigned(I)); 83223017Sdim if ((uint64_t)(unsigned)I != I) 84193323Sed Bits.push_back(unsigned(I >> 32)); 85193323Sed} 86193323Sed 87198090Srdivackyvoid FoldingSetNodeID::AddString(StringRef String) { 88198090Srdivacky unsigned Size = String.size(); 89193323Sed Bits.push_back(Size); 90193323Sed if (!Size) return; 91193323Sed 92193323Sed unsigned Units = Size / 4; 93193323Sed unsigned Pos = 0; 94198090Srdivacky const unsigned *Base = (const unsigned*) String.data(); 95193323Sed 96193323Sed // If the string is aligned do a bulk transfer. 97193323Sed if (!((intptr_t)Base & 3)) { 98193323Sed Bits.append(Base, Base + Units); 99193323Sed Pos = (Units + 1) * 4; 100193323Sed } else { 101193323Sed // Otherwise do it the hard way. 102218893Sdim // To be compatible with above bulk transfer, we need to take endianness 103218893Sdim // into account. 104252723Sdim if (sys::IsBigEndianHost) { 105218893Sdim for (Pos += 4; Pos <= Size; Pos += 4) { 106218893Sdim unsigned V = ((unsigned char)String[Pos - 4] << 24) | 107218893Sdim ((unsigned char)String[Pos - 3] << 16) | 108218893Sdim ((unsigned char)String[Pos - 2] << 8) | 109218893Sdim (unsigned char)String[Pos - 1]; 110218893Sdim Bits.push_back(V); 111218893Sdim } 112218893Sdim } else { 113252723Sdim assert(sys::IsLittleEndianHost && "Unexpected host endianness"); 114218893Sdim for (Pos += 4; Pos <= Size; Pos += 4) { 115218893Sdim unsigned V = ((unsigned char)String[Pos - 1] << 24) | 116218893Sdim ((unsigned char)String[Pos - 2] << 16) | 117218893Sdim ((unsigned char)String[Pos - 3] << 8) | 118218893Sdim (unsigned char)String[Pos - 4]; 119218893Sdim Bits.push_back(V); 120218893Sdim } 121193323Sed } 122193323Sed } 123193323Sed 124193323Sed // With the leftover bits. 125193323Sed unsigned V = 0; 126218893Sdim // Pos will have overshot size by 4 - #bytes left over. 127218893Sdim // No need to take endianness into account here - this is always executed. 128193323Sed switch (Pos - Size) { 129193323Sed case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru. 130193323Sed case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru. 131193323Sed case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break; 132193323Sed default: return; // Nothing left. 133193323Sed } 134193323Sed 135193323Sed Bits.push_back(V); 136193323Sed} 137193323Sed 138221345Sdim// AddNodeID - Adds the Bit data of another ID to *this. 139221345Sdimvoid FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) { 140221345Sdim Bits.append(ID.Bits.begin(), ID.Bits.end()); 141221345Sdim} 142221345Sdim 143193323Sed/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to 144193323Sed/// lookup the node in the FoldingSetImpl. 145193323Sedunsigned FoldingSetNodeID::ComputeHash() const { 146212904Sdim return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash(); 147193323Sed} 148193323Sed 149193323Sed/// operator== - Used to compare two nodes to each other. 150193323Sed/// 151252723Sdimbool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const { 152212904Sdim return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); 153193323Sed} 154193323Sed 155212904Sdim/// operator== - Used to compare two nodes to each other. 156212904Sdim/// 157212904Sdimbool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const { 158212904Sdim return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS; 159212904Sdim} 160212904Sdim 161245431Sdim/// Used to compare the "ordering" of two nodes as defined by the 162245431Sdim/// profiled bits and their ordering defined by memcmp(). 163252723Sdimbool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS) const { 164245431Sdim return *this < FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); 165245431Sdim} 166245431Sdim 167245431Sdimbool FoldingSetNodeID::operator<(FoldingSetNodeIDRef RHS) const { 168245431Sdim return FoldingSetNodeIDRef(Bits.data(), Bits.size()) < RHS; 169245431Sdim} 170245431Sdim 171205407Srdivacky/// Intern - Copy this node's data to a memory region allocated from the 172205407Srdivacky/// given allocator and return a FoldingSetNodeIDRef describing the 173205407Srdivacky/// interned data. 174205407SrdivackyFoldingSetNodeIDRef 175205407SrdivackyFoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { 176205407Srdivacky unsigned *New = Allocator.Allocate<unsigned>(Bits.size()); 177205407Srdivacky std::uninitialized_copy(Bits.begin(), Bits.end(), New); 178205407Srdivacky return FoldingSetNodeIDRef(New, Bits.size()); 179205407Srdivacky} 180193323Sed 181193323Sed//===----------------------------------------------------------------------===// 182193323Sed/// Helper functions for FoldingSetImpl. 183193323Sed 184193323Sed/// GetNextPtr - In order to save space, each bucket is a 185193323Sed/// singly-linked-list. In order to make deletion more efficient, we make 186193323Sed/// the list circular, so we can delete a node without computing its hash. 187193323Sed/// The problem with this is that the start of the hash buckets are not 188193323Sed/// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null: 189193323Sed/// use GetBucketPtr when this happens. 190193323Sedstatic FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) { 191193323Sed // The low bit is set if this is the pointer back to the bucket. 192193323Sed if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1) 193193323Sed return 0; 194193323Sed 195193323Sed return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr); 196193323Sed} 197193323Sed 198193323Sed 199193323Sed/// testing. 200193323Sedstatic void **GetBucketPtr(void *NextInBucketPtr) { 201193323Sed intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr); 202193323Sed assert((Ptr & 1) && "Not a bucket pointer"); 203193323Sed return reinterpret_cast<void**>(Ptr & ~intptr_t(1)); 204193323Sed} 205193323Sed 206193323Sed/// GetBucketFor - Hash the specified node ID and return the hash bucket for 207193323Sed/// the specified ID. 208212904Sdimstatic void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) { 209193323Sed // NumBuckets is always a power of 2. 210212904Sdim unsigned BucketNum = Hash & (NumBuckets-1); 211193323Sed return Buckets + BucketNum; 212193323Sed} 213193323Sed 214210299Sed/// AllocateBuckets - Allocated initialized bucket memory. 215210299Sedstatic void **AllocateBuckets(unsigned NumBuckets) { 216210299Sed void **Buckets = static_cast<void**>(calloc(NumBuckets+1, sizeof(void*))); 217210299Sed // Set the very last bucket to be a non-null "pointer". 218210299Sed Buckets[NumBuckets] = reinterpret_cast<void*>(-1); 219210299Sed return Buckets; 220210299Sed} 221210299Sed 222193323Sed//===----------------------------------------------------------------------===// 223193323Sed// FoldingSetImpl Implementation 224193323Sed 225193323SedFoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { 226193323Sed assert(5 < Log2InitSize && Log2InitSize < 32 && 227193323Sed "Initial hash table size out of range"); 228193323Sed NumBuckets = 1 << Log2InitSize; 229210299Sed Buckets = AllocateBuckets(NumBuckets); 230210299Sed NumNodes = 0; 231193323Sed} 232193323SedFoldingSetImpl::~FoldingSetImpl() { 233210299Sed free(Buckets); 234193323Sed} 235193323Sedvoid FoldingSetImpl::clear() { 236193323Sed // Set all but the last bucket to null pointers. 237193323Sed memset(Buckets, 0, NumBuckets*sizeof(void*)); 238193323Sed 239193323Sed // Set the very last bucket to be a non-null "pointer". 240193323Sed Buckets[NumBuckets] = reinterpret_cast<void*>(-1); 241193323Sed 242193323Sed // Reset the node count to zero. 243193323Sed NumNodes = 0; 244193323Sed} 245193323Sed 246193323Sed/// GrowHashTable - Double the size of the hash table and rehash everything. 247193323Sed/// 248193323Sedvoid FoldingSetImpl::GrowHashTable() { 249193323Sed void **OldBuckets = Buckets; 250193323Sed unsigned OldNumBuckets = NumBuckets; 251193323Sed NumBuckets <<= 1; 252193323Sed 253193323Sed // Clear out new buckets. 254210299Sed Buckets = AllocateBuckets(NumBuckets); 255210299Sed NumNodes = 0; 256193323Sed 257193323Sed // Walk the old buckets, rehashing nodes into their new place. 258212904Sdim FoldingSetNodeID TempID; 259193323Sed for (unsigned i = 0; i != OldNumBuckets; ++i) { 260193323Sed void *Probe = OldBuckets[i]; 261193323Sed if (!Probe) continue; 262193323Sed while (Node *NodeInBucket = GetNextPtr(Probe)) { 263193323Sed // Figure out the next link, remove NodeInBucket from the old link. 264193323Sed Probe = NodeInBucket->getNextInBucket(); 265193323Sed NodeInBucket->SetNextInBucket(0); 266193323Sed 267193323Sed // Insert the node into the new bucket, after recomputing the hash. 268212904Sdim InsertNode(NodeInBucket, 269212904Sdim GetBucketFor(ComputeNodeHash(NodeInBucket, TempID), 270212904Sdim Buckets, NumBuckets)); 271212904Sdim TempID.clear(); 272193323Sed } 273193323Sed } 274193323Sed 275210299Sed free(OldBuckets); 276193323Sed} 277193323Sed 278193323Sed/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, 279193323Sed/// return it. If not, return the insertion token that will make insertion 280193323Sed/// faster. 281193323SedFoldingSetImpl::Node 282193323Sed*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID, 283193323Sed void *&InsertPos) { 284235633Sdim unsigned IDHash = ID.ComputeHash(); 285235633Sdim void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets); 286193323Sed void *Probe = *Bucket; 287193323Sed 288193323Sed InsertPos = 0; 289193323Sed 290212904Sdim FoldingSetNodeID TempID; 291193323Sed while (Node *NodeInBucket = GetNextPtr(Probe)) { 292235633Sdim if (NodeEquals(NodeInBucket, ID, IDHash, TempID)) 293193323Sed return NodeInBucket; 294212904Sdim TempID.clear(); 295193323Sed 296193323Sed Probe = NodeInBucket->getNextInBucket(); 297193323Sed } 298193323Sed 299193323Sed // Didn't find the node, return null with the bucket as the InsertPos. 300193323Sed InsertPos = Bucket; 301193323Sed return 0; 302193323Sed} 303193323Sed 304193323Sed/// InsertNode - Insert the specified node into the folding set, knowing that it 305193323Sed/// is not already in the map. InsertPos must be obtained from 306193323Sed/// FindNodeOrInsertPos. 307193323Sedvoid FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { 308193323Sed assert(N->getNextInBucket() == 0); 309193323Sed // Do we need to grow the hashtable? 310193323Sed if (NumNodes+1 > NumBuckets*2) { 311193323Sed GrowHashTable(); 312212904Sdim FoldingSetNodeID TempID; 313212904Sdim InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets); 314193323Sed } 315193323Sed 316193323Sed ++NumNodes; 317193323Sed 318193323Sed /// The insert position is actually a bucket pointer. 319193323Sed void **Bucket = static_cast<void**>(InsertPos); 320193323Sed 321193323Sed void *Next = *Bucket; 322193323Sed 323193323Sed // If this is the first insertion into this bucket, its next pointer will be 324193323Sed // null. Pretend as if it pointed to itself, setting the low bit to indicate 325193323Sed // that it is a pointer to the bucket. 326193323Sed if (Next == 0) 327193323Sed Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1); 328193323Sed 329193323Sed // Set the node's next pointer, and make the bucket point to the node. 330193323Sed N->SetNextInBucket(Next); 331193323Sed *Bucket = N; 332193323Sed} 333193323Sed 334193323Sed/// RemoveNode - Remove a node from the folding set, returning true if one was 335193323Sed/// removed or false if the node was not in the folding set. 336193323Sedbool FoldingSetImpl::RemoveNode(Node *N) { 337193323Sed // Because each bucket is a circular list, we don't need to compute N's hash 338193323Sed // to remove it. 339193323Sed void *Ptr = N->getNextInBucket(); 340193323Sed if (Ptr == 0) return false; // Not in folding set. 341193323Sed 342193323Sed --NumNodes; 343193323Sed N->SetNextInBucket(0); 344193323Sed 345193323Sed // Remember what N originally pointed to, either a bucket or another node. 346193323Sed void *NodeNextPtr = Ptr; 347193323Sed 348193323Sed // Chase around the list until we find the node (or bucket) which points to N. 349193323Sed while (true) { 350193323Sed if (Node *NodeInBucket = GetNextPtr(Ptr)) { 351193323Sed // Advance pointer. 352193323Sed Ptr = NodeInBucket->getNextInBucket(); 353193323Sed 354193323Sed // We found a node that points to N, change it to point to N's next node, 355193323Sed // removing N from the list. 356193323Sed if (Ptr == N) { 357193323Sed NodeInBucket->SetNextInBucket(NodeNextPtr); 358193323Sed return true; 359193323Sed } 360193323Sed } else { 361193323Sed void **Bucket = GetBucketPtr(Ptr); 362193323Sed Ptr = *Bucket; 363193323Sed 364193323Sed // If we found that the bucket points to N, update the bucket to point to 365193323Sed // whatever is next. 366193323Sed if (Ptr == N) { 367193323Sed *Bucket = NodeNextPtr; 368193323Sed return true; 369193323Sed } 370193323Sed } 371193323Sed } 372193323Sed} 373193323Sed 374193323Sed/// GetOrInsertNode - If there is an existing simple Node exactly 375193323Sed/// equal to the specified node, return it. Otherwise, insert 'N' and it 376193323Sed/// instead. 377193323SedFoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) { 378193323Sed FoldingSetNodeID ID; 379212904Sdim GetNodeProfile(N, ID); 380193323Sed void *IP; 381193323Sed if (Node *E = FindNodeOrInsertPos(ID, IP)) 382193323Sed return E; 383193323Sed InsertNode(N, IP); 384193323Sed return N; 385193323Sed} 386193323Sed 387193323Sed//===----------------------------------------------------------------------===// 388193323Sed// FoldingSetIteratorImpl Implementation 389193323Sed 390193323SedFoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) { 391193323Sed // Skip to the first non-null non-self-cycle bucket. 392193323Sed while (*Bucket != reinterpret_cast<void*>(-1) && 393193323Sed (*Bucket == 0 || GetNextPtr(*Bucket) == 0)) 394193323Sed ++Bucket; 395193323Sed 396193323Sed NodePtr = static_cast<FoldingSetNode*>(*Bucket); 397193323Sed} 398193323Sed 399193323Sedvoid FoldingSetIteratorImpl::advance() { 400193323Sed // If there is another link within this bucket, go to it. 401193323Sed void *Probe = NodePtr->getNextInBucket(); 402193323Sed 403193323Sed if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe)) 404193323Sed NodePtr = NextNodeInBucket; 405193323Sed else { 406193323Sed // Otherwise, this is the last link in this bucket. 407193323Sed void **Bucket = GetBucketPtr(Probe); 408193323Sed 409193323Sed // Skip to the next non-null non-self-cycle bucket. 410193323Sed do { 411193323Sed ++Bucket; 412193323Sed } while (*Bucket != reinterpret_cast<void*>(-1) && 413193323Sed (*Bucket == 0 || GetNextPtr(*Bucket) == 0)); 414193323Sed 415193323Sed NodePtr = static_cast<FoldingSetNode*>(*Bucket); 416193323Sed } 417193323Sed} 418193323Sed 419193323Sed//===----------------------------------------------------------------------===// 420193323Sed// FoldingSetBucketIteratorImpl Implementation 421193323Sed 422193323SedFoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) { 423193323Sed Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket; 424193323Sed} 425