LLVM 23.0.0git
FoldingSet.cpp
Go to the documentation of this file.
1//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a hash set that can be used to remove duplication of
10// nodes in a graph.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/FoldingSet.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/StringRef.h"
21#include <cassert>
22#include <cstring>
23using namespace llvm;
24
25//===----------------------------------------------------------------------===//
26// FoldingSetNodeIDRef Implementation
27
29 if (Size != RHS.Size) return false;
30 return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0;
31}
32
33/// Used to compare the "ordering" of two nodes as defined by the
34/// profiled bits and their ordering defined by memcmp().
36 if (Size != RHS.Size)
37 return Size < RHS.Size;
38 return memcmp(Data, RHS.Data, Size*sizeof(*Data)) < 0;
39}
40
41//===----------------------------------------------------------------------===//
42// FoldingSetNodeID Implementation
43
44/// Add* - Add various data types to Bit data.
45///
47 unsigned Size = String.size();
48
49 unsigned NumInserts = 1 + divideCeil(Size, 4);
50 Bits.reserve(Bits.size() + NumInserts);
51
52 Bits.push_back(Size);
53 if (!Size) return;
54
55 unsigned Units = Size / 4;
56 unsigned Pos = 0;
57 const unsigned *Base = (const unsigned*) String.data();
58
59 // If the string is aligned do a bulk transfer.
60 if (!((intptr_t)Base & 3)) {
61 Bits.append(Base, Base + Units);
62 Pos = (Units + 1) * 4;
63 } else {
64 // Otherwise do it the hard way.
65 // To be compatible with above bulk transfer, we need to take endianness
66 // into account.
68 "Unexpected host endianness");
70 for (Pos += 4; Pos <= Size; Pos += 4) {
71 unsigned V = ((unsigned char)String[Pos - 4] << 24) |
72 ((unsigned char)String[Pos - 3] << 16) |
73 ((unsigned char)String[Pos - 2] << 8) |
74 (unsigned char)String[Pos - 1];
75 Bits.push_back(V);
76 }
77 } else { // Little-endian host
78 for (Pos += 4; Pos <= Size; Pos += 4) {
79 unsigned V = ((unsigned char)String[Pos - 1] << 24) |
80 ((unsigned char)String[Pos - 2] << 16) |
81 ((unsigned char)String[Pos - 3] << 8) |
82 (unsigned char)String[Pos - 4];
83 Bits.push_back(V);
84 }
85 }
86 }
87
88 // With the leftover bits.
89 unsigned V = 0;
90 // Pos will have overshot size by 4 - #bytes left over.
91 // No need to take endianness into account here - this is always executed.
92 switch (Pos - Size) {
93 case 1: V = (V << 8) | (unsigned char)String[Size - 3]; [[fallthrough]];
94 case 2: V = (V << 8) | (unsigned char)String[Size - 2]; [[fallthrough]];
95 case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break;
96 default: return; // Nothing left.
97 }
98
99 Bits.push_back(V);
100}
101
102// AddNodeID - Adds the Bit data of another ID to *this.
104 Bits.append(ID.Bits.begin(), ID.Bits.end());
105}
106
107/// operator== - Used to compare two nodes to each other.
108///
110 return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
111}
112
113/// operator== - Used to compare two nodes to each other.
114///
116 return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS;
117}
118
119/// Used to compare the "ordering" of two nodes as defined by the
120/// profiled bits and their ordering defined by memcmp().
122 return *this < FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
123}
124
126 return FoldingSetNodeIDRef(Bits.data(), Bits.size()) < RHS;
127}
128
129/// Intern - Copy this node's data to a memory region allocated from the
130/// given allocator and return a FoldingSetNodeIDRef describing the
131/// interned data.
134 unsigned *New = Allocator.Allocate<unsigned>(Bits.size());
135 llvm::uninitialized_copy(Bits, New);
136 return FoldingSetNodeIDRef(New, Bits.size());
137}
138
139//===----------------------------------------------------------------------===//
140/// Helper functions for FoldingSetBase.
141
142/// GetNextPtr - In order to save space, each bucket is a
143/// singly-linked-list. In order to make deletion more efficient, we make
144/// the list circular, so we can delete a node without computing its hash.
145/// The problem with this is that the start of the hash buckets are not
146/// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null:
147/// use GetBucketPtr when this happens.
148static FoldingSetBase::Node *GetNextPtr(void *NextInBucketPtr) {
149 // The low bit is set if this is the pointer back to the bucket.
150 if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
151 return nullptr;
152
153 return static_cast<FoldingSetBase::Node*>(NextInBucketPtr);
154}
155
156
157/// testing.
158static void **GetBucketPtr(void *NextInBucketPtr) {
159 intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr);
160 assert((Ptr & 1) && "Not a bucket pointer");
161 return reinterpret_cast<void**>(Ptr & ~intptr_t(1));
162}
163
164/// GetBucketFor - Hash the specified node ID and return the hash bucket for
165/// the specified ID.
166static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) {
167 // NumBuckets is always a power of 2.
168 unsigned BucketNum = Hash & (NumBuckets-1);
169 return Buckets + BucketNum;
170}
171
172/// AllocateBuckets - Allocated initialized bucket memory.
173static void **AllocateBuckets(unsigned NumBuckets) {
174 void **Buckets = static_cast<void**>(safe_calloc(NumBuckets + 1,
175 sizeof(void*)));
176 // Set the very last bucket to be a non-null "pointer".
177 Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
178 return Buckets;
179}
180
181//===----------------------------------------------------------------------===//
182// FoldingSetBase Implementation
183
184FoldingSetBase::FoldingSetBase(unsigned Log2InitSize) {
185 assert(5 < Log2InitSize && Log2InitSize < 32 &&
186 "Initial hash table size out of range");
187 NumBuckets = 1 << Log2InitSize;
189 NumNodes = 0;
190}
191
194 Arg.Buckets = nullptr;
195 Arg.NumBuckets = 0;
196 Arg.NumNodes = 0;
197}
198
200 free(Buckets); // This may be null if the set is in a moved-from state.
201 Buckets = RHS.Buckets;
202 NumBuckets = RHS.NumBuckets;
203 NumNodes = RHS.NumNodes;
204 RHS.Buckets = nullptr;
205 RHS.NumBuckets = 0;
206 RHS.NumNodes = 0;
207 return *this;
208}
209
213
215 // Set all but the last bucket to null pointers.
216 memset(Buckets, 0, NumBuckets*sizeof(void*));
217
218 // Set the very last bucket to be a non-null "pointer".
219 Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
220
221 // Reset the node count to zero.
222 NumNodes = 0;
223}
224
225void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount,
226 const FoldingSetInfo &Info) {
227 assert((NewBucketCount > NumBuckets) &&
228 "Can't shrink a folding set with GrowBucketCount");
229 assert(isPowerOf2_32(NewBucketCount) && "Bad bucket count!");
230 void **OldBuckets = Buckets;
231 unsigned OldNumBuckets = NumBuckets;
232
233 // Clear out new buckets.
234 Buckets = AllocateBuckets(NewBucketCount);
235 // Set NumBuckets only if allocation of new buckets was successful.
236 NumBuckets = NewBucketCount;
237 NumNodes = 0;
238
239 // Walk the old buckets, rehashing nodes into their new place.
240 FoldingSetNodeID TempID;
241 for (unsigned i = 0; i != OldNumBuckets; ++i) {
242 void *Probe = OldBuckets[i];
243 if (!Probe) continue;
244 while (Node *NodeInBucket = GetNextPtr(Probe)) {
245 // Figure out the next link, remove NodeInBucket from the old link.
246 Probe = NodeInBucket->getNextInBucket();
247 NodeInBucket->SetNextInBucket(nullptr);
248
249 // Insert the node into the new bucket, after recomputing the hash.
250 InsertNode(NodeInBucket,
251 GetBucketFor(Info.ComputeNodeHash(this, NodeInBucket, TempID),
253 Info);
254 TempID.clear();
255 }
256 }
257
258 free(OldBuckets);
259}
260
261/// GrowHashTable - Double the size of the hash table and rehash everything.
262///
263void FoldingSetBase::GrowHashTable(const FoldingSetInfo &Info) {
264 GrowBucketCount(NumBuckets * 2, Info);
265}
266
267void FoldingSetBase::reserve(unsigned EltCount, const FoldingSetInfo &Info) {
268 // This will give us somewhere between EltCount / 2 and
269 // EltCount buckets. This puts us in the load factor
270 // range of 1.0 - 2.0.
271 if(EltCount < capacity())
272 return;
273 GrowBucketCount(llvm::bit_floor(EltCount), Info);
274}
275
276/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists,
277/// return it. If not, return the insertion token that will make insertion
278/// faster.
280 const FoldingSetNodeID &ID, void *&InsertPos, const FoldingSetInfo &Info) {
281 unsigned IDHash = ID.ComputeHash();
282 void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets);
283 void *Probe = *Bucket;
284
285 InsertPos = nullptr;
286
287 FoldingSetNodeID TempID;
288 while (Node *NodeInBucket = GetNextPtr(Probe)) {
289 if (Info.NodeEquals(this, NodeInBucket, ID, IDHash, TempID))
290 return NodeInBucket;
291 TempID.clear();
292
293 Probe = NodeInBucket->getNextInBucket();
294 }
295
296 // Didn't find the node, return null with the bucket as the InsertPos.
297 InsertPos = Bucket;
298 return nullptr;
299}
300
301/// InsertNode - Insert the specified node into the folding set, knowing that it
302/// is not already in the map. InsertPos must be obtained from
303/// FindNodeOrInsertPos.
304void FoldingSetBase::InsertNode(Node *N, void *InsertPos,
305 const FoldingSetInfo &Info) {
306 assert(!N->getNextInBucket());
307 // Do we need to grow the hashtable?
308 if (NumNodes+1 > capacity()) {
309 GrowHashTable(Info);
310 FoldingSetNodeID TempID;
311 InsertPos = GetBucketFor(Info.ComputeNodeHash(this, N, TempID), Buckets,
312 NumBuckets);
313 }
314
315 ++NumNodes;
316
317 /// The insert position is actually a bucket pointer.
318 void **Bucket = static_cast<void**>(InsertPos);
319
320 void *Next = *Bucket;
321
322 // If this is the first insertion into this bucket, its next pointer will be
323 // null. Pretend as if it pointed to itself, setting the low bit to indicate
324 // that it is a pointer to the bucket.
325 if (!Next)
326 Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1);
327
328 // Set the node's next pointer, and make the bucket point to the node.
329 N->SetNextInBucket(Next);
330 *Bucket = N;
331}
332
333/// RemoveNode - Remove a node from the folding set, returning true if one was
334/// removed or false if the node was not in the folding set.
336 // Because each bucket is a circular list, we don't need to compute N's hash
337 // to remove it.
338 void *Ptr = N->getNextInBucket();
339 if (!Ptr) return false; // Not in folding set.
340
341 --NumNodes;
342 N->SetNextInBucket(nullptr);
343
344 // Remember what N originally pointed to, either a bucket or another node.
345 void *NodeNextPtr = Ptr;
346
347 // Chase around the list until we find the node (or bucket) which points to N.
348 while (true) {
349 if (Node *NodeInBucket = GetNextPtr(Ptr)) {
350 // Advance pointer.
351 Ptr = NodeInBucket->getNextInBucket();
352
353 // We found a node that points to N, change it to point to N's next node,
354 // removing N from the list.
355 if (Ptr == N) {
356 NodeInBucket->SetNextInBucket(NodeNextPtr);
357 return true;
358 }
359 } else {
360 void **Bucket = GetBucketPtr(Ptr);
361 Ptr = *Bucket;
362
363 // If we found that the bucket points to N, update the bucket to point to
364 // whatever is next.
365 if (Ptr == N) {
366 *Bucket = NodeNextPtr;
367 return true;
368 }
369 }
370 }
371}
372
373/// GetOrInsertNode - If there is an existing simple Node exactly
374/// equal to the specified node, return it. Otherwise, insert 'N' and it
375/// instead.
378 const FoldingSetInfo &Info) {
380 Info.GetNodeProfile(this, N, ID);
381 void *IP;
382 if (Node *E = FindNodeOrInsertPos(ID, IP, Info))
383 return E;
384 InsertNode(N, IP, Info);
385 return N;
386}
387
388//===----------------------------------------------------------------------===//
389// FoldingSetIteratorImpl Implementation
390
392 // Skip to the first non-null non-self-cycle bucket.
393 while (*Bucket != reinterpret_cast<void*>(-1) &&
394 (!*Bucket || !GetNextPtr(*Bucket)))
395 ++Bucket;
396
397 NodePtr = static_cast<FoldingSetNode*>(*Bucket);
398}
399
401 // If there is another link within this bucket, go to it.
402 void *Probe = NodePtr->getNextInBucket();
403
404 if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe))
405 NodePtr = NextNodeInBucket;
406 else {
407 // Otherwise, this is the last link in this bucket.
408 void **Bucket = GetBucketPtr(Probe);
409
410 // Skip to the next non-null non-self-cycle bucket.
411 do {
412 ++Bucket;
413 } while (*Bucket != reinterpret_cast<void*>(-1) &&
414 (!*Bucket || !GetNextPtr(*Bucket)));
415
416 NodePtr = static_cast<FoldingSetNode*>(*Bucket);
417 }
418}
419
420//===----------------------------------------------------------------------===//
421// FoldingSetBucketIteratorImpl Implementation
422
424 Ptr = (!*Bucket || !GetNextPtr(*Bucket)) ? (void*) Bucket : *Bucket;
425}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the BumpPtrAllocator interface.
static void ** GetBucketPtr(void *NextInBucketPtr)
testing.
static void ** GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets)
GetBucketFor - Hash the specified node ID and return the hash bucket for the specified ID.
static void ** AllocateBuckets(unsigned NumBuckets)
AllocateBuckets - Allocated initialized bucket memory.
static FoldingSetBase::Node * GetNextPtr(void *NextInBucketPtr)
Helper functions for FoldingSetBase.
This file defines a hash set that can be used to remove duplication of nodes in a graph.
This file contains some templates that are useful if you are working with the STL at all.
This class is used to maintain the singly linked bucket list in a folding set.
Definition FoldingSet.h:313
LLVM_ABI FoldingSetBase(unsigned Log2InitSize=6)
void ** Buckets
Array of bucket chains.
Definition FoldingSet.h:295
LLVM_ABI void reserve(unsigned EltCount, const FoldingSetInfo &Info)
Increase the number of buckets such that adding the EltCount th node won't cause a rebucket operation...
LLVM_ABI bool RemoveNode(Node *N)
Remove a node from the folding set, returning true if one was removed or false if the node was not in...
LLVM_ABI FoldingSetBase & operator=(FoldingSetBase &&RHS)
LLVM_ABI ~FoldingSetBase()
unsigned NumBuckets
Length of the Buckets array. Always a power of 2.
Definition FoldingSet.h:298
unsigned NumNodes
Number of nodes in the folding set.
Definition FoldingSet.h:302
unsigned capacity()
Returns the number of nodes permitted in the folding set before a rebucket operation is performed.
Definition FoldingSet.h:337
LLVM_ABI Node * GetOrInsertNode(Node *N, const FoldingSetInfo &Info)
If there is an existing simple Node exactly equal to the node N, return it.
LLVM_ABI void InsertNode(Node *N, void *InsertPos, const FoldingSetInfo &Info)
Insert the specified node into the folding set, knowing that it is not already in the folding set.
LLVM_ABI void clear()
Remove all nodes from the folding set.
LLVM_ABI Node * FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos, const FoldingSetInfo &Info)
Look up the node specified by ID.
LLVM_ABI FoldingSetBucketIteratorImpl(void **Bucket)
LLVM_ABI FoldingSetIteratorImpl(void **Bucket)
This class describes a reference to an interned FoldingSetNodeID, which can be a useful to store node...
Definition FoldingSet.h:171
LLVM_ABI bool operator==(FoldingSetNodeIDRef) const
LLVM_ABI bool operator<(FoldingSetNodeIDRef) const
Used to compare the "ordering" of two nodes as defined by the profiled bits and their ordering define...
This class is used to gather all the unique data bits of a node.
Definition FoldingSet.h:208
LLVM_ABI FoldingSetNodeIDRef Intern(BumpPtrAllocator &Allocator) const
Copy this node's data to a memory region allocated from the given allocator and return a FoldingSetNo...
void clear()
Clear the accumulated profile, allowing this FoldingSetNodeID object to be used to compute a new prof...
Definition FoldingSet.h:252
LLVM_ABI bool operator==(const FoldingSetNodeID &RHS) const
operator== - Used to compare two nodes to each other.
LLVM_ABI bool operator<(const FoldingSetNodeID &RHS) const
Used to compare the "ordering" of two nodes as defined by the profiled bits and their ordering define...
LLVM_ABI void AddNodeID(const FoldingSetNodeID &ID)
LLVM_ABI void AddString(StringRef String)
Add* - Add various data types to Bit data.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
constexpr bool IsLittleEndianHost
constexpr bool IsBigEndianHost
This is an optimization pass for GlobalISel generic memory operations.
FoldingSetBase::Node FoldingSetNode
Definition FoldingSet.h:404
auto uninitialized_copy(R &&Src, IterTy Dst)
Definition STLExtras.h:2110
LLVM_ATTRIBUTE_RETURNS_NONNULL void * safe_calloc(size_t Count, size_t Sz)
Definition MemAlloc.h:38
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347
#define N
Functions provided by the derived class to compute folding properties.
Definition FoldingSet.h:347