1193323Sed//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file implements a hash set that can be used to remove duplication of
11249423Sdim// nodes in a graph.
12193323Sed//
13193323Sed//===----------------------------------------------------------------------===//
14193323Sed
15193323Sed#include "llvm/ADT/FoldingSet.h"
16234353Sdim#include "llvm/ADT/Hashing.h"
17205407Srdivacky#include "llvm/Support/Allocator.h"
18198090Srdivacky#include "llvm/Support/ErrorHandling.h"
19249423Sdim#include "llvm/Support/Host.h"
20193323Sed#include "llvm/Support/MathExtras.h"
21193323Sed#include <cassert>
22193323Sed#include <cstring>
23193323Sedusing namespace llvm;
24193323Sed
25193323Sed//===----------------------------------------------------------------------===//
26212904Sdim// FoldingSetNodeIDRef Implementation
27212904Sdim
28212904Sdim/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
29212904Sdim/// used to lookup the node in the FoldingSetImpl.
30212904Sdimunsigned FoldingSetNodeIDRef::ComputeHash() const {
31234353Sdim  return static_cast<unsigned>(hash_combine_range(Data, Data+Size));
32212904Sdim}
33212904Sdim
34212904Sdimbool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
35212904Sdim  if (Size != RHS.Size) return false;
36212904Sdim  return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0;
37212904Sdim}
38212904Sdim
39243830Sdim/// Used to compare the "ordering" of two nodes as defined by the
40243830Sdim/// profiled bits and their ordering defined by memcmp().
41243830Sdimbool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const {
42243830Sdim  if (Size != RHS.Size)
43243830Sdim    return Size < RHS.Size;
44243830Sdim  return memcmp(Data, RHS.Data, Size*sizeof(*Data)) < 0;
45243830Sdim}
46243830Sdim
47212904Sdim//===----------------------------------------------------------------------===//
48193323Sed// FoldingSetNodeID Implementation
49193323Sed
50193323Sed/// Add* - Add various data types to Bit data.
51193323Sed///
52193323Sedvoid FoldingSetNodeID::AddPointer(const void *Ptr) {
53193323Sed  // Note: this adds pointers to the hash using sizes and endianness that
54288943Sdim  // depend on the host. It doesn't matter, however, because hashing on
55288943Sdim  // pointer values is inherently unstable. Nothing should depend on the
56193323Sed  // ordering of nodes in the folding set.
57226633Sdim  Bits.append(reinterpret_cast<unsigned *>(&Ptr),
58226633Sdim              reinterpret_cast<unsigned *>(&Ptr+1));
59193323Sed}
60193323Sedvoid FoldingSetNodeID::AddInteger(signed I) {
61193323Sed  Bits.push_back(I);
62193323Sed}
63193323Sedvoid FoldingSetNodeID::AddInteger(unsigned I) {
64193323Sed  Bits.push_back(I);
65193323Sed}
66193323Sedvoid FoldingSetNodeID::AddInteger(long I) {
67193323Sed  AddInteger((unsigned long)I);
68193323Sed}
69193323Sedvoid FoldingSetNodeID::AddInteger(unsigned long I) {
70193323Sed  if (sizeof(long) == sizeof(int))
71193323Sed    AddInteger(unsigned(I));
72193323Sed  else if (sizeof(long) == sizeof(long long)) {
73193323Sed    AddInteger((unsigned long long)I);
74193323Sed  } else {
75198090Srdivacky    llvm_unreachable("unexpected sizeof(long)");
76193323Sed  }
77193323Sed}
78193323Sedvoid FoldingSetNodeID::AddInteger(long long I) {
79193323Sed  AddInteger((unsigned long long)I);
80193323Sed}
81193323Sedvoid FoldingSetNodeID::AddInteger(unsigned long long I) {
82193323Sed  AddInteger(unsigned(I));
83223017Sdim  if ((uint64_t)(unsigned)I != I)
84193323Sed    Bits.push_back(unsigned(I >> 32));
85193323Sed}
86193323Sed
87198090Srdivackyvoid FoldingSetNodeID::AddString(StringRef String) {
88198090Srdivacky  unsigned Size =  String.size();
89193323Sed  Bits.push_back(Size);
90193323Sed  if (!Size) return;
91193323Sed
92193323Sed  unsigned Units = Size / 4;
93193323Sed  unsigned Pos = 0;
94198090Srdivacky  const unsigned *Base = (const unsigned*) String.data();
95193323Sed
96193323Sed  // If the string is aligned do a bulk transfer.
97193323Sed  if (!((intptr_t)Base & 3)) {
98193323Sed    Bits.append(Base, Base + Units);
99193323Sed    Pos = (Units + 1) * 4;
100193323Sed  } else {
101193323Sed    // Otherwise do it the hard way.
102218893Sdim    // To be compatible with above bulk transfer, we need to take endianness
103218893Sdim    // into account.
104288943Sdim    static_assert(sys::IsBigEndianHost || sys::IsLittleEndianHost,
105288943Sdim                  "Unexpected host endianness");
106251662Sdim    if (sys::IsBigEndianHost) {
107218893Sdim      for (Pos += 4; Pos <= Size; Pos += 4) {
108218893Sdim        unsigned V = ((unsigned char)String[Pos - 4] << 24) |
109218893Sdim                     ((unsigned char)String[Pos - 3] << 16) |
110218893Sdim                     ((unsigned char)String[Pos - 2] << 8) |
111218893Sdim                      (unsigned char)String[Pos - 1];
112218893Sdim        Bits.push_back(V);
113218893Sdim      }
114288943Sdim    } else {  // Little-endian host
115218893Sdim      for (Pos += 4; Pos <= Size; Pos += 4) {
116218893Sdim        unsigned V = ((unsigned char)String[Pos - 1] << 24) |
117218893Sdim                     ((unsigned char)String[Pos - 2] << 16) |
118218893Sdim                     ((unsigned char)String[Pos - 3] << 8) |
119218893Sdim                      (unsigned char)String[Pos - 4];
120218893Sdim        Bits.push_back(V);
121218893Sdim      }
122193323Sed    }
123193323Sed  }
124193323Sed
125193323Sed  // With the leftover bits.
126193323Sed  unsigned V = 0;
127218893Sdim  // Pos will have overshot size by 4 - #bytes left over.
128218893Sdim  // No need to take endianness into account here - this is always executed.
129193323Sed  switch (Pos - Size) {
130193323Sed  case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru.
131193323Sed  case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru.
132193323Sed  case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break;
133193323Sed  default: return; // Nothing left.
134193323Sed  }
135193323Sed
136193323Sed  Bits.push_back(V);
137193323Sed}
138193323Sed
139221345Sdim// AddNodeID - Adds the Bit data of another ID to *this.
140221345Sdimvoid FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) {
141221345Sdim  Bits.append(ID.Bits.begin(), ID.Bits.end());
142221345Sdim}
143221345Sdim
144193323Sed/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
145193323Sed/// lookup the node in the FoldingSetImpl.
146193323Sedunsigned FoldingSetNodeID::ComputeHash() const {
147212904Sdim  return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
148193323Sed}
149193323Sed
150193323Sed/// operator== - Used to compare two nodes to each other.
151193323Sed///
152249423Sdimbool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS) const {
153212904Sdim  return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
154193323Sed}
155193323Sed
156212904Sdim/// operator== - Used to compare two nodes to each other.
157212904Sdim///
158212904Sdimbool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const {
159212904Sdim  return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS;
160212904Sdim}
161212904Sdim
162243830Sdim/// Used to compare the "ordering" of two nodes as defined by the
163243830Sdim/// profiled bits and their ordering defined by memcmp().
164249423Sdimbool FoldingSetNodeID::operator<(const FoldingSetNodeID &RHS) const {
165243830Sdim  return *this < FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
166243830Sdim}
167243830Sdim
168243830Sdimbool FoldingSetNodeID::operator<(FoldingSetNodeIDRef RHS) const {
169243830Sdim  return FoldingSetNodeIDRef(Bits.data(), Bits.size()) < RHS;
170243830Sdim}
171243830Sdim
172205407Srdivacky/// Intern - Copy this node's data to a memory region allocated from the
173205407Srdivacky/// given allocator and return a FoldingSetNodeIDRef describing the
174205407Srdivacky/// interned data.
175205407SrdivackyFoldingSetNodeIDRef
176205407SrdivackyFoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
177205407Srdivacky  unsigned *New = Allocator.Allocate<unsigned>(Bits.size());
178205407Srdivacky  std::uninitialized_copy(Bits.begin(), Bits.end(), New);
179205407Srdivacky  return FoldingSetNodeIDRef(New, Bits.size());
180205407Srdivacky}
181193323Sed
182193323Sed//===----------------------------------------------------------------------===//
183193323Sed/// Helper functions for FoldingSetImpl.
184193323Sed
185193323Sed/// GetNextPtr - In order to save space, each bucket is a
186193323Sed/// singly-linked-list. In order to make deletion more efficient, we make
187193323Sed/// the list circular, so we can delete a node without computing its hash.
188193323Sed/// The problem with this is that the start of the hash buckets are not
189193323Sed/// Nodes.  If NextInBucketPtr is a bucket pointer, this method returns null:
190193323Sed/// use GetBucketPtr when this happens.
191193323Sedstatic FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) {
192193323Sed  // The low bit is set if this is the pointer back to the bucket.
193193323Sed  if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
194276479Sdim    return nullptr;
195193323Sed
196193323Sed  return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr);
197193323Sed}
198193323Sed
199193323Sed
200193323Sed/// testing.
201193323Sedstatic void **GetBucketPtr(void *NextInBucketPtr) {
202193323Sed  intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr);
203193323Sed  assert((Ptr & 1) && "Not a bucket pointer");
204193323Sed  return reinterpret_cast<void**>(Ptr & ~intptr_t(1));
205193323Sed}
206193323Sed
207193323Sed/// GetBucketFor - Hash the specified node ID and return the hash bucket for
208193323Sed/// the specified ID.
209212904Sdimstatic void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) {
210193323Sed  // NumBuckets is always a power of 2.
211212904Sdim  unsigned BucketNum = Hash & (NumBuckets-1);
212193323Sed  return Buckets + BucketNum;
213193323Sed}
214193323Sed
215210299Sed/// AllocateBuckets - Allocated initialized bucket memory.
216210299Sedstatic void **AllocateBuckets(unsigned NumBuckets) {
217210299Sed  void **Buckets = static_cast<void**>(calloc(NumBuckets+1, sizeof(void*)));
218210299Sed  // Set the very last bucket to be a non-null "pointer".
219210299Sed  Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
220210299Sed  return Buckets;
221210299Sed}
222210299Sed
223193323Sed//===----------------------------------------------------------------------===//
224193323Sed// FoldingSetImpl Implementation
225193323Sed
226288943Sdimvoid FoldingSetImpl::anchor() {}
227288943Sdim
228193323SedFoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
229193323Sed  assert(5 < Log2InitSize && Log2InitSize < 32 &&
230193323Sed         "Initial hash table size out of range");
231193323Sed  NumBuckets = 1 << Log2InitSize;
232210299Sed  Buckets = AllocateBuckets(NumBuckets);
233210299Sed  NumNodes = 0;
234193323Sed}
235296417Sdim
236296417SdimFoldingSetImpl::FoldingSetImpl(FoldingSetImpl &&Arg)
237296417Sdim    : Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) {
238296417Sdim  Arg.Buckets = nullptr;
239296417Sdim  Arg.NumBuckets = 0;
240296417Sdim  Arg.NumNodes = 0;
241296417Sdim}
242296417Sdim
243296417SdimFoldingSetImpl &FoldingSetImpl::operator=(FoldingSetImpl &&RHS) {
244296417Sdim  free(Buckets); // This may be null if the set is in a moved-from state.
245296417Sdim  Buckets = RHS.Buckets;
246296417Sdim  NumBuckets = RHS.NumBuckets;
247296417Sdim  NumNodes = RHS.NumNodes;
248296417Sdim  RHS.Buckets = nullptr;
249296417Sdim  RHS.NumBuckets = 0;
250296417Sdim  RHS.NumNodes = 0;
251296417Sdim  return *this;
252296417Sdim}
253296417Sdim
254193323SedFoldingSetImpl::~FoldingSetImpl() {
255210299Sed  free(Buckets);
256193323Sed}
257296417Sdim
258193323Sedvoid FoldingSetImpl::clear() {
259193323Sed  // Set all but the last bucket to null pointers.
260193323Sed  memset(Buckets, 0, NumBuckets*sizeof(void*));
261193323Sed
262193323Sed  // Set the very last bucket to be a non-null "pointer".
263193323Sed  Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
264193323Sed
265193323Sed  // Reset the node count to zero.
266193323Sed  NumNodes = 0;
267193323Sed}
268193323Sed
269193323Sed/// GrowHashTable - Double the size of the hash table and rehash everything.
270193323Sed///
271193323Sedvoid FoldingSetImpl::GrowHashTable() {
272193323Sed  void **OldBuckets = Buckets;
273193323Sed  unsigned OldNumBuckets = NumBuckets;
274193323Sed  NumBuckets <<= 1;
275193323Sed
276193323Sed  // Clear out new buckets.
277210299Sed  Buckets = AllocateBuckets(NumBuckets);
278210299Sed  NumNodes = 0;
279193323Sed
280193323Sed  // Walk the old buckets, rehashing nodes into their new place.
281212904Sdim  FoldingSetNodeID TempID;
282193323Sed  for (unsigned i = 0; i != OldNumBuckets; ++i) {
283193323Sed    void *Probe = OldBuckets[i];
284193323Sed    if (!Probe) continue;
285193323Sed    while (Node *NodeInBucket = GetNextPtr(Probe)) {
286193323Sed      // Figure out the next link, remove NodeInBucket from the old link.
287193323Sed      Probe = NodeInBucket->getNextInBucket();
288276479Sdim      NodeInBucket->SetNextInBucket(nullptr);
289193323Sed
290193323Sed      // Insert the node into the new bucket, after recomputing the hash.
291212904Sdim      InsertNode(NodeInBucket,
292212904Sdim                 GetBucketFor(ComputeNodeHash(NodeInBucket, TempID),
293212904Sdim                              Buckets, NumBuckets));
294212904Sdim      TempID.clear();
295193323Sed    }
296193323Sed  }
297193323Sed
298210299Sed  free(OldBuckets);
299193323Sed}
300193323Sed
301193323Sed/// FindNodeOrInsertPos - Look up the node specified by ID.  If it exists,
302193323Sed/// return it.  If not, return the insertion token that will make insertion
303193323Sed/// faster.
304193323SedFoldingSetImpl::Node
305193323Sed*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
306193323Sed                                     void *&InsertPos) {
307234353Sdim  unsigned IDHash = ID.ComputeHash();
308234353Sdim  void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets);
309193323Sed  void *Probe = *Bucket;
310193323Sed
311276479Sdim  InsertPos = nullptr;
312193323Sed
313212904Sdim  FoldingSetNodeID TempID;
314193323Sed  while (Node *NodeInBucket = GetNextPtr(Probe)) {
315234353Sdim    if (NodeEquals(NodeInBucket, ID, IDHash, TempID))
316193323Sed      return NodeInBucket;
317212904Sdim    TempID.clear();
318193323Sed
319193323Sed    Probe = NodeInBucket->getNextInBucket();
320193323Sed  }
321193323Sed
322193323Sed  // Didn't find the node, return null with the bucket as the InsertPos.
323193323Sed  InsertPos = Bucket;
324276479Sdim  return nullptr;
325193323Sed}
326193323Sed
327193323Sed/// InsertNode - Insert the specified node into the folding set, knowing that it
328193323Sed/// is not already in the map.  InsertPos must be obtained from
329193323Sed/// FindNodeOrInsertPos.
330193323Sedvoid FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
331276479Sdim  assert(!N->getNextInBucket());
332193323Sed  // Do we need to grow the hashtable?
333193323Sed  if (NumNodes+1 > NumBuckets*2) {
334193323Sed    GrowHashTable();
335212904Sdim    FoldingSetNodeID TempID;
336212904Sdim    InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets);
337193323Sed  }
338193323Sed
339193323Sed  ++NumNodes;
340193323Sed
341193323Sed  /// The insert position is actually a bucket pointer.
342193323Sed  void **Bucket = static_cast<void**>(InsertPos);
343193323Sed
344193323Sed  void *Next = *Bucket;
345193323Sed
346193323Sed  // If this is the first insertion into this bucket, its next pointer will be
347193323Sed  // null.  Pretend as if it pointed to itself, setting the low bit to indicate
348193323Sed  // that it is a pointer to the bucket.
349276479Sdim  if (!Next)
350193323Sed    Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1);
351193323Sed
352193323Sed  // Set the node's next pointer, and make the bucket point to the node.
353193323Sed  N->SetNextInBucket(Next);
354193323Sed  *Bucket = N;
355193323Sed}
356193323Sed
357193323Sed/// RemoveNode - Remove a node from the folding set, returning true if one was
358193323Sed/// removed or false if the node was not in the folding set.
359193323Sedbool FoldingSetImpl::RemoveNode(Node *N) {
360193323Sed  // Because each bucket is a circular list, we don't need to compute N's hash
361193323Sed  // to remove it.
362193323Sed  void *Ptr = N->getNextInBucket();
363276479Sdim  if (!Ptr) return false;  // Not in folding set.
364193323Sed
365193323Sed  --NumNodes;
366276479Sdim  N->SetNextInBucket(nullptr);
367193323Sed
368193323Sed  // Remember what N originally pointed to, either a bucket or another node.
369193323Sed  void *NodeNextPtr = Ptr;
370193323Sed
371193323Sed  // Chase around the list until we find the node (or bucket) which points to N.
372193323Sed  while (true) {
373193323Sed    if (Node *NodeInBucket = GetNextPtr(Ptr)) {
374193323Sed      // Advance pointer.
375193323Sed      Ptr = NodeInBucket->getNextInBucket();
376193323Sed
377193323Sed      // We found a node that points to N, change it to point to N's next node,
378193323Sed      // removing N from the list.
379193323Sed      if (Ptr == N) {
380193323Sed        NodeInBucket->SetNextInBucket(NodeNextPtr);
381193323Sed        return true;
382193323Sed      }
383193323Sed    } else {
384193323Sed      void **Bucket = GetBucketPtr(Ptr);
385193323Sed      Ptr = *Bucket;
386193323Sed
387193323Sed      // If we found that the bucket points to N, update the bucket to point to
388193323Sed      // whatever is next.
389193323Sed      if (Ptr == N) {
390193323Sed        *Bucket = NodeNextPtr;
391193323Sed        return true;
392193323Sed      }
393193323Sed    }
394193323Sed  }
395193323Sed}
396193323Sed
397193323Sed/// GetOrInsertNode - If there is an existing simple Node exactly
398193323Sed/// equal to the specified node, return it.  Otherwise, insert 'N' and it
399193323Sed/// instead.
400193323SedFoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) {
401193323Sed  FoldingSetNodeID ID;
402212904Sdim  GetNodeProfile(N, ID);
403193323Sed  void *IP;
404193323Sed  if (Node *E = FindNodeOrInsertPos(ID, IP))
405193323Sed    return E;
406193323Sed  InsertNode(N, IP);
407193323Sed  return N;
408193323Sed}
409193323Sed
410193323Sed//===----------------------------------------------------------------------===//
411193323Sed// FoldingSetIteratorImpl Implementation
412193323Sed
413193323SedFoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
414193323Sed  // Skip to the first non-null non-self-cycle bucket.
415193323Sed  while (*Bucket != reinterpret_cast<void*>(-1) &&
416276479Sdim         (!*Bucket || !GetNextPtr(*Bucket)))
417193323Sed    ++Bucket;
418193323Sed
419193323Sed  NodePtr = static_cast<FoldingSetNode*>(*Bucket);
420193323Sed}
421193323Sed
422193323Sedvoid FoldingSetIteratorImpl::advance() {
423193323Sed  // If there is another link within this bucket, go to it.
424193323Sed  void *Probe = NodePtr->getNextInBucket();
425193323Sed
426193323Sed  if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe))
427193323Sed    NodePtr = NextNodeInBucket;
428193323Sed  else {
429193323Sed    // Otherwise, this is the last link in this bucket.
430193323Sed    void **Bucket = GetBucketPtr(Probe);
431193323Sed
432193323Sed    // Skip to the next non-null non-self-cycle bucket.
433193323Sed    do {
434193323Sed      ++Bucket;
435193323Sed    } while (*Bucket != reinterpret_cast<void*>(-1) &&
436276479Sdim             (!*Bucket || !GetNextPtr(*Bucket)));
437193323Sed
438193323Sed    NodePtr = static_cast<FoldingSetNode*>(*Bucket);
439193323Sed  }
440193323Sed}
441193323Sed
442193323Sed//===----------------------------------------------------------------------===//
443193323Sed// FoldingSetBucketIteratorImpl Implementation
444193323Sed
445193323SedFoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) {
446276479Sdim  Ptr = (!*Bucket || !GetNextPtr(*Bucket)) ? (void*) Bucket : *Bucket;
447193323Sed}
448