LLVM 23.0.0git
Hashing.h
Go to the documentation of this file.
1//===-- llvm/ADT/Hashing.h - Utilities for hashing --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the newly proposed standard C++ interfaces for hashing
10// arbitrary data and building hash functions for user-defined types. This
11// interface was originally proposed in N3333[1] and is currently under review
12// for inclusion in a future TR and/or standard.
13//
14// The primary interfaces provide are comprised of one type and three functions:
15//
16// -- 'hash_code' class is an opaque type representing the hash code for some
17// data. It is the intended product of hashing, and can be used to implement
18// hash tables, checksumming, and other common uses of hashes. It is not an
19// integer type (although it can be converted to one) because it is risky
20// to assume much about the internals of a hash_code. In particular, each
21// execution of the program has a high probability of producing a different
22// hash_code for a given input. Thus their values are not stable to save or
23// persist, and should only be used during the execution for the
24// construction of hashing datastructures.
25//
26// -- 'hash_value' is a function designed to be overloaded for each
27// user-defined type which wishes to be used within a hashing context. It
28// should be overloaded within the user-defined type's namespace and found
29// via ADL. Overloads for primitive types are provided by this library.
30//
31// -- 'hash_combine' and 'hash_combine_range' are functions designed to aid
32// programmers in easily and intuitively combining a set of data into
33// a single hash_code for their object. They should only logically be used
34// within the implementation of a 'hash_value' routine or similar context.
35//
36// 'hash_combine_range' hashes the byte stream of the range via xxh3. The
37// contiguous-array overload hashes the range in place; the iterator overload
38// materializes the byte stream into a 256-byte on-stack buffer, falling back
39// to the heap for ranges that exceed it.
40//
41//===----------------------------------------------------------------------===//
42
43#ifndef LLVM_ADT_HASHING_H
44#define LLVM_ADT_HASHING_H
45
46#include "llvm/ADT/ADL.h"
47#include "llvm/Config/abi-breaking.h"
52#include "llvm/Support/xxhash.h"
53#include <algorithm>
54#include <array>
55#include <cassert>
56#include <cstring>
57#include <memory>
58#include <optional>
59#include <string>
60#include <tuple>
61#include <utility>
62
63namespace llvm {
64template <typename T, typename Enable> struct DenseMapInfo;
65
66/// An opaque object representing a hash code.
67///
68/// This object represents the result of hashing some entity. It is intended to
69/// be used to implement hashtables or other hashing-based data structures.
70/// While it wraps and exposes a numeric value, this value should not be
71/// trusted to be stable or predictable across processes or executions.
72///
73/// In order to obtain the hash_code for an object 'x':
74/// \code
75/// using llvm::hash_value;
76/// llvm::hash_code code = hash_value(x);
77/// \endcode
78class hash_code {
79 size_t value;
80
81public:
82 /// Default construct a hash_code.
83 /// Note that this leaves the value uninitialized.
84 hash_code() = default;
85
86 /// Form a hash code directly from a numerical value.
87 constexpr hash_code(size_t value) : value(value) {}
88
89 /// Convert the hash code to its numerical value for use.
90 /*explicit*/ constexpr operator size_t() const { return value; }
91
92 friend constexpr bool operator==(const hash_code &lhs, const hash_code &rhs) {
93 return lhs.value == rhs.value;
94 }
95 friend constexpr bool operator!=(const hash_code &lhs, const hash_code &rhs) {
96 return lhs.value != rhs.value;
97 }
98
99 /// Allow a hash_code to be directly run through hash_value.
100 friend constexpr size_t hash_value(const hash_code &code) {
101 return code.value;
102 }
103};
104
105/// Compute a hash_code for any integer value.
106///
107/// Note that this function is intended to compute the same hash_code for
108/// a particular value without regard to the pre-promotion type. This is in
109/// contrast to hash_combine which may produce different hash_codes for
110/// differing argument types even if they would implicit promote to a common
111/// type without changing the value.
112template <typename T>
113std::enable_if_t<is_integral_or_enum<T>::value, hash_code> hash_value(T value);
114
115/// Compute a hash_code for a pointer's address.
116///
117/// N.B.: This hashes the *address*. Not the value and not the type.
118template <typename T> hash_code hash_value(const T *ptr);
119
120/// Compute a hash_code for a pair of objects.
121template <typename T, typename U>
122hash_code hash_value(const std::pair<T, U> &arg);
123
124/// Compute a hash_code for a tuple.
125template <typename... Ts>
126hash_code hash_value(const std::tuple<Ts...> &arg);
127
128/// Compute a hash_code for a standard string.
129template <typename T>
130hash_code hash_value(const std::basic_string<T> &arg);
131
132/// Compute a hash_code for a standard string.
133template <typename T> hash_code hash_value(const std::optional<T> &arg);
134
135// All of the implementation details of actually computing the various hash
136// code values are held within this namespace. These routines are included in
137// the header file mainly to allow inlining and constant propagation.
138namespace hashing {
139namespace detail {
140
141inline uint32_t fetch32(const char *p) {
142 uint32_t result;
143 std::memcpy(&result, p, sizeof(result));
145 sys::swapByteOrder(result);
146 return result;
147}
148
150 // Murmur-inspired hashing.
151 const uint64_t kMul = 0x9ddfea08eb382d69ULL;
152 uint64_t a = (low ^ high) * kMul;
153 a ^= (a >> 47);
154 uint64_t b = (high ^ a) * kMul;
155 b ^= (b >> 47);
156 b *= kMul;
157 return b;
158}
159
160/// In LLVM_ENABLE_ABI_BREAKING_CHECKS builds, the seed is non-deterministic
161/// per process (address of a function in LLVMSupport) to prevent having users
162/// depend on the particular hash values. On platforms without ASLR, this is
163/// still likely non-deterministic per build.
165#if LLVM_ENABLE_ABI_BREAKING_CHECKS
166 return static_cast<uint64_t>(
167 reinterpret_cast<uintptr_t>(&install_fatal_error_handler));
168#else
169 return 0xff51afd7ed558ccdULL;
170#endif
171}
172
173/// Hash a contiguous byte buffer to a hash_code. The execution seed is XORed
174/// into the result (not propagated through the avalanche), so a given byte
175/// stream produces the same xxh3 output modulo the per-process seed.
176//
177// TODO: post-XOR allows `hash_combine(x) ^ hash_combine(y)` to cancel the
178// process seed. Follow-up: add a seeded xxh3 entry in
179// llvm/lib/Support/xxhash.cpp.
180inline hash_code combine_bytes(const char *data, size_t len) {
181 return xxh3_64bits(reinterpret_cast<const uint8_t *>(data), len) ^
183}
184
185/// Trait to indicate whether a type's bits can be hashed directly.
186///
187/// A type trait which is true if we want to combine values for hashing by
188/// reading the underlying data. It is false if values of this type must
189/// first be passed to hash_value, and the resulting hash_codes combined.
190//
191// FIXME: We want to replace is_integral_or_enum and is_pointer here with
192// a predicate which asserts that comparing the underlying storage of two
193// values of the type for equality is equivalent to comparing the two values
194// for equality. For all the platforms we care about, this holds for integers
195// and pointers, but there are platforms where it doesn't and we would like to
196// support user-defined types which happen to satisfy this property.
197template <typename T>
198struct is_hashable_data : std::bool_constant<((is_integral_or_enum<T>::value ||
199 std::is_pointer<T>::value) &&
200 64 % sizeof(T) == 0)> {};
201
202// Special case std::pair to detect when both types are viable and when there
203// is no alignment-derived padding in the pair. This is a bit of a lie because
204// std::pair isn't truly POD, but it's close enough in all reasonable
205// implementations for our use case of hashing the underlying data.
206template <typename T, typename U>
207struct is_hashable_data<std::pair<T, U>>
208 : std::bool_constant<(is_hashable_data<T>::value &&
209 is_hashable_data<U>::value &&
210 (sizeof(T) + sizeof(U)) == sizeof(std::pair<T, U>))> {
211};
212
213/// Helper to get the hashable data representation for a type.
214template <typename T> auto get_hashable_data(const T &value) {
215 if constexpr (is_hashable_data<T>::value) {
216 // This variant is enabled when the type itself can be used.
217 return value;
218 } else {
219 // This variant is enabled when we must first call hash_value and use the
220 // result as our data.
221 using ::llvm::hash_value;
222 return static_cast<size_t>(hash_value(value));
223 }
224}
225
226/// Implement the combining of integral values into a hash_code.
227///
228/// This overload is selected when the value type of the iterator is
229/// integral. Rather than computing a hash_code for each object and then
230/// combining them, this (as an optimization) directly combines the integers.
231///
232/// xxh3 has no streaming entry point in libLLVMSupport, so the byte stream is
233/// flattened to a buffer and hashed in one shot. The 256-byte on-stack buffer
234/// holds 32 pointer-sized values, which covers virtually all in-tree
235/// non-contiguous callers. The prior chunked CityHash impl streamed and never
236/// allocated.
237template <typename InputIteratorT>
238hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) {
239 alignas(uint64_t) char stack_buf[256];
240 std::unique_ptr<char[]> heap_buf;
241 char *buf = stack_buf;
242 size_t cap = sizeof(stack_buf);
243 size_t len = 0;
244 for (; first != last; ++first) {
245 auto data = get_hashable_data(*first);
246 if (len + sizeof(data) > cap) {
247 size_t new_cap = cap * 2;
248 while (new_cap < len + sizeof(data))
249 new_cap *= 2;
250 // `new char[]` default-initializes (no zero-fill); make_unique would
251 // value-initialize, which is wasted work for a buffer about to be
252 // overwritten.
253 std::unique_ptr<char[]> new_buf(new char[new_cap]);
254 std::memcpy(new_buf.get(), buf, len);
255 heap_buf = std::move(new_buf);
256 buf = heap_buf.get();
257 cap = new_cap;
258 }
259 std::memcpy(buf + len, &data, sizeof(data));
260 len += sizeof(data);
261 }
262 return combine_bytes(buf, len);
263}
264
265/// Implement the combining of integral values into a hash_code.
266///
267/// This overload is selected when the value type of the iterator is integral
268/// and when the input iterator is actually a pointer. Rather than computing
269/// a hash_code for each object and then combining them, this (as an
270/// optimization) directly combines the integers. Also, because the integers
271/// are stored in contiguous memory, this routine avoids copying each value
272/// and directly reads from the underlying memory.
273template <typename ValueT>
274std::enable_if_t<is_hashable_data<ValueT>::value, hash_code>
275hash_combine_range_impl(ValueT *first, ValueT *last) {
276 return combine_bytes(reinterpret_cast<const char *>(first),
277 size_t(last - first) * sizeof(ValueT));
278}
279
280/// Sum of `sizeof(get_hashable_data(arg))` across a parameter pack.
281template <typename... Ts> constexpr size_t total_hashable_size() {
282 return (size_t(0) + ... +
283 sizeof(decltype(get_hashable_data(std::declval<Ts>()))));
284}
285
286/// Copy `get_hashable_data(arg)` into `buf` at offset `off`, advancing `off`.
287template <typename T>
288inline void store_hashable_data(char *buf, size_t &off, const T &arg) {
289 auto data = get_hashable_data(arg);
290 std::memcpy(buf + off, &data, sizeof(data));
291 off += sizeof(data);
292}
293
294} // namespace detail
295} // namespace hashing
296
297
298/// Compute a hash_code for a sequence of values.
299///
300/// This hashes a sequence of values. It produces the same hash_code as
301/// 'hash_combine(a, b, c, ...)', but can run over arbitrary sized sequences
302/// and is significantly faster given pointers and types which can be hashed as
303/// a sequence of bytes.
304template <typename InputIteratorT>
305hash_code hash_combine_range(InputIteratorT first, InputIteratorT last) {
306 return ::llvm::hashing::detail::hash_combine_range_impl(first, last);
307}
308
309// A wrapper for hash_combine_range above.
310template <typename RangeT> hash_code hash_combine_range(RangeT &&R) {
311 return hash_combine_range(adl_begin(R), adl_end(R));
312}
313
314/// Combine values into a single hash_code.
315///
316/// This routine accepts a varying number of arguments of any type. It will
317/// attempt to combine them into a single hash_code. For user-defined types it
318/// attempts to call a \see hash_value overload (via ADL) for the type. For
319/// integer and pointer types it directly combines their data into the
320/// resulting hash_code.
321///
322/// The result is suitable for returning from a user's hash_value
323/// *implementation* for their user-defined type. Consumers of a type should
324/// *not* call this routine, they should instead call 'hash_value'.
325template <typename... Ts> hash_code hash_combine(const Ts &...args) {
326 constexpr size_t Total = hashing::detail::total_hashable_size<Ts...>();
327 // Round up so `data()` is non-null when Total == 0; combine_bytes won't
328 // read the buffer in that case (len=0 short-circuits in xxh3_64bits).
329 std::array<char, std::max<size_t>(1, Total)> buf;
330 size_t off = 0;
331 (hashing::detail::store_hashable_data(buf.data(), off, args), ...);
332 return hashing::detail::combine_bytes(buf.data(), Total);
333}
334
335// Implementation details for implementations of hash_value overloads provided
336// here.
337namespace hashing {
338namespace detail {
339
340/// Helper to hash the value of a single integer.
341///
342/// Overloads for smaller integer types are not provided to ensure consistent
343/// behavior in the presence of integral promotions. Essentially,
344/// "hash_value('4')" and "hash_value('0' + 4)" should be the same.
346 // Similar to hash_4to8_bytes but using a seed instead of length.
347 const uint64_t seed = get_execution_seed();
348 const char *s = reinterpret_cast<const char *>(&value);
349 const uint64_t a = fetch32(s);
350 return hash_16_bytes(seed + (a << 3), fetch32(s + 4));
351}
352
353} // namespace detail
354} // namespace hashing
355
356// Declared and documented above, but defined here so that any of the hashing
357// infrastructure is available.
358template <typename T>
359std::enable_if_t<is_integral_or_enum<T>::value, hash_code> hash_value(T value) {
360 return ::llvm::hashing::detail::hash_integer_value(
361 static_cast<uint64_t>(value));
362}
363
364// Declared and documented above, but defined here so that any of the hashing
365// infrastructure is available.
366template <typename T> hash_code hash_value(const T *ptr) {
367 return ::llvm::hashing::detail::hash_integer_value(
368 reinterpret_cast<uintptr_t>(ptr));
369}
370
371// Declared and documented above, but defined here so that any of the hashing
372// infrastructure is available.
373template <typename T, typename U>
374hash_code hash_value(const std::pair<T, U> &arg) {
375 return hash_combine(arg.first, arg.second);
376}
377
378template <typename... Ts> hash_code hash_value(const std::tuple<Ts...> &arg) {
379 return std::apply([](const auto &...xs) { return hash_combine(xs...); }, arg);
380}
381
382// Declared and documented above, but defined here so that any of the hashing
383// infrastructure is available.
384template <typename T>
385hash_code hash_value(const std::basic_string<T> &arg) {
386 return hash_combine_range(arg);
387}
388
389template <typename T> hash_code hash_value(const std::optional<T> &arg) {
390 return arg ? hash_combine(true, *arg) : hash_value(false);
391}
392
393template <> struct DenseMapInfo<hash_code, void> {
394 static constexpr hash_code getEmptyKey() { return hash_code(-1); }
395 static constexpr hash_code getTombstoneKey() { return hash_code(-2); }
396 static constexpr unsigned getHashValue(hash_code val) {
397 return static_cast<unsigned>(size_t(val));
398 }
399 static constexpr bool isEqual(hash_code LHS, hash_code RHS) {
400 return LHS == RHS;
401 }
402};
403
404} // namespace llvm
405
406/// Implement std::hash so that hash_code can be used in STL containers.
407namespace std {
408
409template<>
410struct hash<llvm::hash_code> {
411 constexpr size_t operator()(llvm::hash_code const &Val) const { return Val; }
412};
413
414} // namespace std;
415
416#endif
#define T
nvptx lower args
static Split data
Value * RHS
Value * LHS
An opaque object representing a hash code.
Definition Hashing.h:78
constexpr hash_code(size_t value)
Form a hash code directly from a numerical value.
Definition Hashing.h:87
friend constexpr size_t hash_value(const hash_code &code)
Allow a hash_code to be directly run through hash_value.
Definition Hashing.h:100
hash_code()=default
Default construct a hash_code.
friend constexpr bool operator==(const hash_code &lhs, const hash_code &rhs)
Definition Hashing.h:92
friend constexpr bool operator!=(const hash_code &lhs, const hash_code &rhs)
Definition Hashing.h:95
hash_code combine_bytes(const char *data, size_t len)
Hash a contiguous byte buffer to a hash_code.
Definition Hashing.h:180
constexpr uint64_t hash_16_bytes(uint64_t low, uint64_t high)
Definition Hashing.h:149
constexpr size_t total_hashable_size()
Sum of sizeof(get_hashable_data(arg)) across a parameter pack.
Definition Hashing.h:281
hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last)
Implement the combining of integral values into a hash_code.
Definition Hashing.h:238
hash_code hash_integer_value(uint64_t value)
Helper to hash the value of a single integer.
Definition Hashing.h:345
auto get_hashable_data(const T &value)
Helper to get the hashable data representation for a type.
Definition Hashing.h:214
uint64_t get_execution_seed()
In LLVM_ENABLE_ABI_BREAKING_CHECKS builds, the seed is non-deterministic per process (address of a fu...
Definition Hashing.h:164
uint32_t fetch32(const char *p)
Definition Hashing.h:141
void store_hashable_data(char *buf, size_t &off, const T &arg)
Copy get_hashable_data(arg) into buf at offset off, advancing off.
Definition Hashing.h:288
constexpr bool IsBigEndianHost
void swapByteOrder(T &Value)
This is an optimization pass for GlobalISel generic memory operations.
hash_code hash_value(const FixedPointSemantics &Val)
constexpr auto adl_begin(RangeT &&range) -> decltype(adl_detail::begin_impl(std::forward< RangeT >(range)))
Returns the begin iterator to range using std::begin and function found through Argument-Dependent Lo...
Definition ADL.h:78
uint64_t xxh3_64bits(ArrayRef< uint8_t > data)
Inline ArrayRef overloads of the xxhash entry points declared out-of-line in llvm/Support/xxhash....
Definition ArrayRef.h:558
LLVM_ABI void install_fatal_error_handler(fatal_error_handler_t handler, void *user_data=nullptr)
install_fatal_error_handler - Installs a new error handler to be used whenever a serious (non-recover...
constexpr auto adl_end(RangeT &&range) -> decltype(adl_detail::end_impl(std::forward< RangeT >(range)))
Returns the end iterator to range using std::end and functions found through Argument-Dependent Looku...
Definition ADL.h:86
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:325
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:305
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:874
static constexpr bool isEqual(hash_code LHS, hash_code RHS)
Definition Hashing.h:399
static constexpr hash_code getEmptyKey()
Definition Hashing.h:394
static constexpr unsigned getHashValue(hash_code val)
Definition Hashing.h:396
static constexpr hash_code getTombstoneKey()
Definition Hashing.h:395
An information struct used to provide DenseMap with the various necessary components for a given valu...
Trait to indicate whether a type's bits can be hashed directly.
Definition Hashing.h:200
constexpr size_t operator()(llvm::hash_code const &Val) const
Definition Hashing.h:411