LLVM 23.0.0git
Hashing.h
Go to the documentation of this file.
1//===-- llvm/ADT/Hashing.h - Utilities for hashing --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the newly proposed standard C++ interfaces for hashing
10// arbitrary data and building hash functions for user-defined types. This
11// interface was originally proposed in N3333[1] and is currently under review
12// for inclusion in a future TR and/or standard.
13//
14// The primary interfaces provide are comprised of one type and three functions:
15//
16// -- 'hash_code' class is an opaque type representing the hash code for some
17// data. It is the intended product of hashing, and can be used to implement
18// hash tables, checksumming, and other common uses of hashes. It is not an
19// integer type (although it can be converted to one) because it is risky
20// to assume much about the internals of a hash_code. In particular, each
21// execution of the program has a high probability of producing a different
22// hash_code for a given input. Thus their values are not stable to save or
23// persist, and should only be used during the execution for the
24// construction of hashing datastructures.
25//
26// -- 'hash_value' is a function designed to be overloaded for each
27// user-defined type which wishes to be used within a hashing context. It
28// should be overloaded within the user-defined type's namespace and found
29// via ADL. Overloads for primitive types are provided by this library.
30//
31// -- 'hash_combine' and 'hash_combine_range' are functions designed to aid
32// programmers in easily and intuitively combining a set of data into
33// a single hash_code for their object. They should only logically be used
34// within the implementation of a 'hash_value' routine or similar context.
35//
36// 'hash_combine_range' hashes the byte stream of the range via xxh3. The
37// contiguous-array overload hashes the range in place; the iterator overload
38// materializes the byte stream into a 256-byte on-stack buffer, falling back
39// to the heap for ranges that exceed it.
40//
41//===----------------------------------------------------------------------===//
42
43#ifndef LLVM_ADT_HASHING_H
44#define LLVM_ADT_HASHING_H
45
46#include "llvm/ADT/ADL.h"
47#include "llvm/Config/abi-breaking.h"
51#include "llvm/Support/xxhash.h"
52#include <algorithm>
53#include <array>
54#include <cassert>
55#include <cstring>
56#include <memory>
57#include <optional>
58#include <string>
59#include <tuple>
60#include <utility>
61
62namespace llvm {
63template <typename T, typename Enable> struct DenseMapInfo;
64
65/// An opaque object representing a hash code.
66///
67/// This object represents the result of hashing some entity. It is intended to
68/// be used to implement hashtables or other hashing-based data structures.
69/// While it wraps and exposes a numeric value, this value should not be
70/// trusted to be stable or predictable across processes or executions.
71///
72/// In order to obtain the hash_code for an object 'x':
73/// \code
74/// using llvm::hash_value;
75/// llvm::hash_code code = hash_value(x);
76/// \endcode
77class hash_code {
78 size_t value;
79
80public:
81 /// Default construct a hash_code.
82 /// Note that this leaves the value uninitialized.
83 hash_code() = default;
84
85 /// Form a hash code directly from a numerical value.
86 constexpr hash_code(size_t value) : value(value) {}
87
88 /// Convert the hash code to its numerical value for use.
89 /*explicit*/ constexpr operator size_t() const { return value; }
90
91 friend constexpr bool operator==(const hash_code &lhs, const hash_code &rhs) {
92 return lhs.value == rhs.value;
93 }
94 friend constexpr bool operator!=(const hash_code &lhs, const hash_code &rhs) {
95 return lhs.value != rhs.value;
96 }
97
98 /// Allow a hash_code to be directly run through hash_value.
99 friend constexpr size_t hash_value(const hash_code &code) {
100 return code.value;
101 }
102};
103
104/// Compute a hash_code for any integer value.
105///
106/// Note that this function is intended to compute the same hash_code for
107/// a particular value without regard to the pre-promotion type. This is in
108/// contrast to hash_combine which may produce different hash_codes for
109/// differing argument types even if they would implicit promote to a common
110/// type without changing the value.
111template <typename T>
112std::enable_if_t<is_integral_or_enum<T>::value, hash_code> hash_value(T value);
113
114/// Compute a hash_code for a pointer's address.
115///
116/// N.B.: This hashes the *address*. Not the value and not the type.
117template <typename T> hash_code hash_value(const T *ptr);
118
119/// Compute a hash_code for a pair of objects.
120template <typename T, typename U>
121hash_code hash_value(const std::pair<T, U> &arg);
122
123/// Compute a hash_code for a tuple.
124template <typename... Ts>
125hash_code hash_value(const std::tuple<Ts...> &arg);
126
127/// Compute a hash_code for a standard string.
128template <typename T>
129hash_code hash_value(const std::basic_string<T> &arg);
130
131/// Compute a hash_code for a standard string.
132template <typename T> hash_code hash_value(const std::optional<T> &arg);
133
134// All of the implementation details of actually computing the various hash
135// code values are held within this namespace. These routines are included in
136// the header file mainly to allow inlining and constant propagation.
137namespace hashing {
138namespace detail {
139
140/// In LLVM_ENABLE_ABI_BREAKING_CHECKS builds, the seed is non-deterministic
141/// per process (address of a function in LLVMSupport) to prevent having users
142/// depend on the particular hash values. On platforms without ASLR, this is
143/// still likely non-deterministic per build.
145#if LLVM_ENABLE_ABI_BREAKING_CHECKS
146 return static_cast<uint64_t>(
147 reinterpret_cast<uintptr_t>(&install_fatal_error_handler));
148#else
149 return 0xff51afd7ed558ccdULL;
150#endif
151}
152
153/// Hash a contiguous byte buffer to a hash_code. The execution seed is XORed
154/// into the result (not propagated through the avalanche), so a given byte
155/// stream produces the same xxh3 output modulo the per-process seed.
156//
157// TODO: post-XOR allows `hash_combine(x) ^ hash_combine(y)` to cancel the
158// process seed. Follow-up: add a seeded xxh3 entry in
159// llvm/lib/Support/xxhash.cpp.
160inline hash_code combine_bytes(const char *data, size_t len) {
161 return xxh3_64bits(reinterpret_cast<const uint8_t *>(data), len) ^
163}
164
165/// Trait to indicate whether a type's bits can be hashed directly.
166///
167/// A type trait which is true if we want to combine values for hashing by
168/// reading the underlying data. It is false if values of this type must
169/// first be passed to hash_value, and the resulting hash_codes combined.
170//
171// FIXME: We want to replace is_integral_or_enum and is_pointer here with
172// a predicate which asserts that comparing the underlying storage of two
173// values of the type for equality is equivalent to comparing the two values
174// for equality. For all the platforms we care about, this holds for integers
175// and pointers, but there are platforms where it doesn't and we would like to
176// support user-defined types which happen to satisfy this property.
177template <typename T>
178struct is_hashable_data : std::bool_constant<((is_integral_or_enum<T>::value ||
179 std::is_pointer<T>::value) &&
180 64 % sizeof(T) == 0)> {};
181
182// Special case std::pair to detect when both types are viable and when there
183// is no alignment-derived padding in the pair. This is a bit of a lie because
184// std::pair isn't truly POD, but it's close enough in all reasonable
185// implementations for our use case of hashing the underlying data.
186template <typename T, typename U>
187struct is_hashable_data<std::pair<T, U>>
188 : std::bool_constant<(is_hashable_data<T>::value &&
189 is_hashable_data<U>::value &&
190 (sizeof(T) + sizeof(U)) == sizeof(std::pair<T, U>))> {
191};
192
193/// Helper to get the hashable data representation for a type.
194template <typename T> auto get_hashable_data(const T &value) {
195 if constexpr (is_hashable_data<T>::value) {
196 // This variant is enabled when the type itself can be used.
197 return value;
198 } else {
199 // This variant is enabled when we must first call hash_value and use the
200 // result as our data.
201 using ::llvm::hash_value;
202 return static_cast<size_t>(hash_value(value));
203 }
204}
205
206/// Implement the combining of integral values into a hash_code.
207///
208/// This overload is selected when the value type of the iterator is
209/// integral. Rather than computing a hash_code for each object and then
210/// combining them, this (as an optimization) directly combines the integers.
211///
212/// xxh3 has no streaming entry point in libLLVMSupport, so the byte stream is
213/// flattened to a buffer and hashed in one shot. The 256-byte on-stack buffer
214/// holds 32 pointer-sized values, which covers virtually all in-tree
215/// non-contiguous callers. The prior chunked CityHash impl streamed and never
216/// allocated.
217template <typename InputIteratorT>
218hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) {
219 alignas(uint64_t) char stack_buf[256];
220 std::unique_ptr<char[]> heap_buf;
221 char *buf = stack_buf;
222 size_t cap = sizeof(stack_buf);
223 size_t len = 0;
224 for (; first != last; ++first) {
225 auto data = get_hashable_data(*first);
226 if (len + sizeof(data) > cap) {
227 size_t new_cap = cap * 2;
228 while (new_cap < len + sizeof(data))
229 new_cap *= 2;
230 // `new char[]` default-initializes (no zero-fill); make_unique would
231 // value-initialize, which is wasted work for a buffer about to be
232 // overwritten.
233 std::unique_ptr<char[]> new_buf(new char[new_cap]);
234 std::memcpy(new_buf.get(), buf, len);
235 heap_buf = std::move(new_buf);
236 buf = heap_buf.get();
237 cap = new_cap;
238 }
239 std::memcpy(buf + len, &data, sizeof(data));
240 len += sizeof(data);
241 }
242 return combine_bytes(buf, len);
243}
244
245/// Implement the combining of integral values into a hash_code.
246///
247/// This overload is selected when the value type of the iterator is integral
248/// and when the input iterator is actually a pointer. Rather than computing
249/// a hash_code for each object and then combining them, this (as an
250/// optimization) directly combines the integers. Also, because the integers
251/// are stored in contiguous memory, this routine avoids copying each value
252/// and directly reads from the underlying memory.
253template <typename ValueT>
254std::enable_if_t<is_hashable_data<ValueT>::value, hash_code>
255hash_combine_range_impl(ValueT *first, ValueT *last) {
256 return combine_bytes(reinterpret_cast<const char *>(first),
257 size_t(last - first) * sizeof(ValueT));
258}
259
260/// Sum of `sizeof(get_hashable_data(arg))` across a parameter pack.
261template <typename... Ts> constexpr size_t total_hashable_size() {
262 return (size_t(0) + ... +
263 sizeof(decltype(get_hashable_data(std::declval<Ts>()))));
264}
265
266/// Copy `get_hashable_data(arg)` into `buf` at offset `off`, advancing `off`.
267template <typename T>
268inline void store_hashable_data(char *buf, size_t &off, const T &arg) {
269 auto data = get_hashable_data(arg);
270 std::memcpy(buf + off, &data, sizeof(data));
271 off += sizeof(data);
272}
273
274} // namespace detail
275} // namespace hashing
276
277
278/// Compute a hash_code for a sequence of values.
279///
280/// This hashes a sequence of values. It produces the same hash_code as
281/// 'hash_combine(a, b, c, ...)', but can run over arbitrary sized sequences
282/// and is significantly faster given pointers and types which can be hashed as
283/// a sequence of bytes.
284template <typename InputIteratorT>
285hash_code hash_combine_range(InputIteratorT first, InputIteratorT last) {
286 return ::llvm::hashing::detail::hash_combine_range_impl(first, last);
287}
288
289// A wrapper for hash_combine_range above.
290template <typename RangeT> hash_code hash_combine_range(RangeT &&R) {
291 return hash_combine_range(adl_begin(R), adl_end(R));
292}
293
294/// Combine values into a single hash_code.
295///
296/// This routine accepts a varying number of arguments of any type. It will
297/// attempt to combine them into a single hash_code. For user-defined types it
298/// attempts to call a \see hash_value overload (via ADL) for the type. For
299/// integer and pointer types it directly combines their data into the
300/// resulting hash_code.
301///
302/// The result is suitable for returning from a user's hash_value
303/// *implementation* for their user-defined type. Consumers of a type should
304/// *not* call this routine, they should instead call 'hash_value'.
305template <typename... Ts> hash_code hash_combine(const Ts &...args) {
306 constexpr size_t Total = hashing::detail::total_hashable_size<Ts...>();
307 // Round up so `data()` is non-null when Total == 0; combine_bytes won't
308 // read the buffer in that case (len=0 short-circuits in xxh3_64bits).
309 std::array<char, std::max<size_t>(1, Total)> buf;
310 [[maybe_unused]] size_t off = 0;
311 (hashing::detail::store_hashable_data(buf.data(), off, args), ...);
312 return hashing::detail::combine_bytes(buf.data(), Total);
313}
314
315// Implementation details for implementations of hash_value overloads provided
316// here.
317namespace hashing {
318namespace detail {
319
320/// Helper to hash the value of a single integer.
321///
322/// Overloads for smaller integer types are not provided to ensure consistent
323/// behavior in the presence of integral promotions. Essentially,
324/// "hash_value('4')" and "hash_value('0' + 4)" should be the same.
326 // splitmix64 finalizer (xmxmx).
327 uint64_t x = value ^ get_execution_seed();
328 x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ULL;
329 x = (x ^ (x >> 27)) * 0x94d049bb133111ebULL;
330 return x ^ (x >> 31);
331}
332
333} // namespace detail
334} // namespace hashing
335
336// Declared and documented above, but defined here so that any of the hashing
337// infrastructure is available.
338template <typename T>
339std::enable_if_t<is_integral_or_enum<T>::value, hash_code> hash_value(T value) {
340 return ::llvm::hashing::detail::hash_integer_value(
341 static_cast<uint64_t>(value));
342}
343
344// Declared and documented above, but defined here so that any of the hashing
345// infrastructure is available.
346template <typename T> hash_code hash_value(const T *ptr) {
347 return ::llvm::hashing::detail::hash_integer_value(
348 reinterpret_cast<uintptr_t>(ptr));
349}
350
351// Declared and documented above, but defined here so that any of the hashing
352// infrastructure is available.
353template <typename T, typename U>
354hash_code hash_value(const std::pair<T, U> &arg) {
355 return hash_combine(arg.first, arg.second);
356}
357
358template <typename... Ts> hash_code hash_value(const std::tuple<Ts...> &arg) {
359 return std::apply([](const auto &...xs) { return hash_combine(xs...); }, arg);
360}
361
362// Declared and documented above, but defined here so that any of the hashing
363// infrastructure is available.
364template <typename T>
365hash_code hash_value(const std::basic_string<T> &arg) {
366 return hash_combine_range(arg);
367}
368
369template <typename T> hash_code hash_value(const std::optional<T> &arg) {
370 return arg ? hash_combine(true, *arg) : hash_value(false);
371}
372
373template <> struct DenseMapInfo<hash_code, void> {
374 static constexpr unsigned getHashValue(hash_code val) {
375 return static_cast<unsigned>(size_t(val));
376 }
377 static constexpr bool isEqual(hash_code LHS, hash_code RHS) {
378 return LHS == RHS;
379 }
380};
381
382} // namespace llvm
383
384/// Implement std::hash so that hash_code can be used in STL containers.
385namespace std {
386
387template<>
388struct hash<llvm::hash_code> {
389 constexpr size_t operator()(llvm::hash_code const &Val) const { return Val; }
390};
391
392} // namespace std;
393
394#endif
#define T
nvptx lower args
static Split data
Value * RHS
Value * LHS
An opaque object representing a hash code.
Definition Hashing.h:77
constexpr hash_code(size_t value)
Form a hash code directly from a numerical value.
Definition Hashing.h:86
friend constexpr size_t hash_value(const hash_code &code)
Allow a hash_code to be directly run through hash_value.
Definition Hashing.h:99
hash_code()=default
Default construct a hash_code.
friend constexpr bool operator==(const hash_code &lhs, const hash_code &rhs)
Definition Hashing.h:91
friend constexpr bool operator!=(const hash_code &lhs, const hash_code &rhs)
Definition Hashing.h:94
hash_code combine_bytes(const char *data, size_t len)
Hash a contiguous byte buffer to a hash_code.
Definition Hashing.h:160
constexpr size_t total_hashable_size()
Sum of sizeof(get_hashable_data(arg)) across a parameter pack.
Definition Hashing.h:261
hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last)
Implement the combining of integral values into a hash_code.
Definition Hashing.h:218
hash_code hash_integer_value(uint64_t value)
Helper to hash the value of a single integer.
Definition Hashing.h:325
auto get_hashable_data(const T &value)
Helper to get the hashable data representation for a type.
Definition Hashing.h:194
uint64_t get_execution_seed()
In LLVM_ENABLE_ABI_BREAKING_CHECKS builds, the seed is non-deterministic per process (address of a fu...
Definition Hashing.h:144
void store_hashable_data(char *buf, size_t &off, const T &arg)
Copy get_hashable_data(arg) into buf at offset off, advancing off.
Definition Hashing.h:268
This is an optimization pass for GlobalISel generic memory operations.
hash_code hash_value(const FixedPointSemantics &Val)
constexpr auto adl_begin(RangeT &&range) -> decltype(adl_detail::begin_impl(std::forward< RangeT >(range)))
Returns the begin iterator to range using std::begin and function found through Argument-Dependent Lo...
Definition ADL.h:78
uint64_t xxh3_64bits(ArrayRef< uint8_t > data)
Inline ArrayRef overloads of the xxhash entry points declared out-of-line in llvm/Support/xxhash....
Definition ArrayRef.h:558
LLVM_ABI void install_fatal_error_handler(fatal_error_handler_t handler, void *user_data=nullptr)
install_fatal_error_handler - Installs a new error handler to be used whenever a serious (non-recover...
constexpr auto adl_end(RangeT &&range) -> decltype(adl_detail::end_impl(std::forward< RangeT >(range)))
Returns the end iterator to range using std::end and functions found through Argument-Dependent Looku...
Definition ADL.h:86
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:305
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:285
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
static constexpr bool isEqual(hash_code LHS, hash_code RHS)
Definition Hashing.h:377
static constexpr unsigned getHashValue(hash_code val)
Definition Hashing.h:374
An information struct used to provide DenseMap with the various necessary components for a given valu...
Trait to indicate whether a type's bits can be hashed directly.
Definition Hashing.h:180
constexpr size_t operator()(llvm::hash_code const &Val) const
Definition Hashing.h:389