LLVM 23.0.0git
xxhash.cpp
Go to the documentation of this file.
1/*
2 * xxHash - Extremely Fast Hash algorithm
3 * Copyright (C) 2012-2023, Yann Collet
4 *
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - xxHash homepage: http://www.xxhash.com
32 * - xxHash source repository : https://github.com/Cyan4973/xxHash
33 */
34
35// xxh3_64bits is based on commit d5891596637d21366b9b1dcf2c0007a3edb26a9e (July
36// 2023).
37
38// xxh3_128bits is based on commit b0adcc54188c3130b1793e7b19c62eb1e669f7df
39// (June 2024).
40
41#include "llvm/Support/xxhash.h"
43#include "llvm/Support/Endian.h"
44
45#include <stdlib.h>
46
47#if !defined(LLVM_XXH_USE_NEON)
48#if (defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) && \
49 !defined(__ARM_BIG_ENDIAN)
50#define LLVM_XXH_USE_NEON 1
51#else
52#define LLVM_XXH_USE_NEON 0
53#endif
54#endif
55
56#if LLVM_XXH_USE_NEON
57#include <arm_neon.h>
58#endif
59
60using namespace llvm;
61using namespace support;
62
63static uint64_t rotl64(uint64_t X, size_t R) {
64 return (X << R) | (X >> (64 - R));
65}
66
67constexpr uint32_t PRIME32_1 = 0x9E3779B1;
68constexpr uint32_t PRIME32_2 = 0x85EBCA77;
69constexpr uint32_t PRIME32_3 = 0xC2B2AE3D;
70
71static const uint64_t PRIME64_1 = 11400714785074694791ULL;
72static const uint64_t PRIME64_2 = 14029467366897019727ULL;
73static const uint64_t PRIME64_3 = 1609587929392839161ULL;
74static const uint64_t PRIME64_4 = 9650029242287828579ULL;
75static const uint64_t PRIME64_5 = 2870177450012600261ULL;
76
78 hash ^= hash >> 33;
79 hash *= PRIME64_2;
80 hash ^= hash >> 29;
81 hash *= PRIME64_3;
82 hash ^= hash >> 32;
83 return hash;
84}
85
86constexpr size_t XXH3_SECRETSIZE_MIN = 136;
87constexpr size_t XXH_SECRET_DEFAULT_SIZE = 192;
88
89/* Pseudorandom data taken directly from FARSH */
90// clang-format off
92 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
93 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
94 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
95 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
96 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
97 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
98 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
99 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
100 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
101 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
102 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
103 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
104};
105// clang-format on
106
107constexpr uint64_t PRIME_MX1 = 0x165667919E3779F9;
108constexpr uint64_t PRIME_MX2 = 0x9FB21C651E98DF25;
109
110// Calculates a 64-bit to 128-bit multiply, then XOR folds it.
112#if defined(__SIZEOF_INT128__) || \
113 (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
114 __uint128_t product = (__uint128_t)lhs * (__uint128_t)rhs;
115 return uint64_t(product) ^ uint64_t(product >> 64);
116
117#else
118 /* First calculate all of the cross products. */
119 const uint64_t lo_lo = (lhs & 0xFFFFFFFF) * (rhs & 0xFFFFFFFF);
120 const uint64_t hi_lo = (lhs >> 32) * (rhs & 0xFFFFFFFF);
121 const uint64_t lo_hi = (lhs & 0xFFFFFFFF) * (rhs >> 32);
122 const uint64_t hi_hi = (lhs >> 32) * (rhs >> 32);
123
124 /* Now add the products together. These will never overflow. */
125 const uint64_t cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
126 const uint64_t upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
127 const uint64_t lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
128
129 return upper ^ lower;
130#endif
131}
132
133constexpr size_t XXH_STRIPE_LEN = 64;
134constexpr size_t XXH_SECRET_CONSUME_RATE = 8;
135constexpr size_t XXH_ACC_NB = XXH_STRIPE_LEN / sizeof(uint64_t);
136
138 hash ^= hash >> 37;
139 hash *= PRIME_MX1;
140 hash ^= hash >> 32;
141 return hash;
142}
143
144static uint64_t XXH3_len_1to3_64b(const uint8_t *input, size_t len,
145 const uint8_t *secret, uint64_t seed) {
146 const uint8_t c1 = input[0];
147 const uint8_t c2 = input[len >> 1];
148 const uint8_t c3 = input[len - 1];
149 uint32_t combined = ((uint32_t)c1 << 16) | ((uint32_t)c2 << 24) |
150 ((uint32_t)c3 << 0) | ((uint32_t)len << 8);
151 uint64_t bitflip =
152 (uint64_t)(endian::read32le(secret) ^ endian::read32le(secret + 4)) +
153 seed;
154 return XXH64_avalanche(uint64_t(combined) ^ bitflip);
155}
156
157static uint64_t XXH3_len_4to8_64b(const uint8_t *input, size_t len,
158 const uint8_t *secret, uint64_t seed) {
159 seed ^= (uint64_t)byteswap(uint32_t(seed)) << 32;
160 const uint32_t input1 = endian::read32le(input);
161 const uint32_t input2 = endian::read32le(input + len - 4);
162 uint64_t acc =
163 (endian::read64le(secret + 8) ^ endian::read64le(secret + 16)) - seed;
164 const uint64_t input64 = (uint64_t)input2 | ((uint64_t)input1 << 32);
165 acc ^= input64;
166 // XXH3_rrmxmx(acc, len)
167 acc ^= rotl64(acc, 49) ^ rotl64(acc, 24);
168 acc *= PRIME_MX2;
169 acc ^= (acc >> 35) + (uint64_t)len;
170 acc *= PRIME_MX2;
171 return acc ^ (acc >> 28);
172}
173
174static uint64_t XXH3_len_9to16_64b(const uint8_t *input, size_t len,
175 const uint8_t *secret, uint64_t const seed) {
176 uint64_t input_lo =
177 (endian::read64le(secret + 24) ^ endian::read64le(secret + 32)) + seed;
178 uint64_t input_hi =
179 (endian::read64le(secret + 40) ^ endian::read64le(secret + 48)) - seed;
180 input_lo ^= endian::read64le(input);
181 input_hi ^= endian::read64le(input + len - 8);
182 uint64_t acc = uint64_t(len) + byteswap(input_lo) + input_hi +
183 XXH3_mul128_fold64(input_lo, input_hi);
184 return XXH3_avalanche(acc);
185}
186
188static uint64_t XXH3_len_0to16_64b(const uint8_t *input, size_t len,
189 const uint8_t *secret, uint64_t const seed) {
190 if (LLVM_LIKELY(len > 8))
191 return XXH3_len_9to16_64b(input, len, secret, seed);
192 if (LLVM_LIKELY(len >= 4))
193 return XXH3_len_4to8_64b(input, len, secret, seed);
194 if (len != 0)
195 return XXH3_len_1to3_64b(input, len, secret, seed);
196 return XXH64_avalanche(seed ^ endian::read64le(secret + 56) ^
197 endian::read64le(secret + 64));
198}
199
200static uint64_t XXH3_mix16B(const uint8_t *input, uint8_t const *secret,
201 uint64_t seed) {
202 uint64_t lhs = seed;
203 uint64_t rhs = 0U - seed;
204 lhs += endian::read64le(secret);
205 rhs += endian::read64le(secret + 8);
206 lhs ^= endian::read64le(input);
207 rhs ^= endian::read64le(input + 8);
208 return XXH3_mul128_fold64(lhs, rhs);
209}
210
211/* For mid range keys, XXH3 uses a Mum-hash variant. */
213static uint64_t XXH3_len_17to128_64b(const uint8_t *input, size_t len,
214 const uint8_t *secret,
215 uint64_t const seed) {
216 uint64_t acc = len * PRIME64_1, acc_end;
217 acc += XXH3_mix16B(input + 0, secret + 0, seed);
218 acc_end = XXH3_mix16B(input + len - 16, secret + 16, seed);
219 if (len > 32) {
220 acc += XXH3_mix16B(input + 16, secret + 32, seed);
221 acc_end += XXH3_mix16B(input + len - 32, secret + 48, seed);
222 if (len > 64) {
223 acc += XXH3_mix16B(input + 32, secret + 64, seed);
224 acc_end += XXH3_mix16B(input + len - 48, secret + 80, seed);
225 if (len > 96) {
226 acc += XXH3_mix16B(input + 48, secret + 96, seed);
227 acc_end += XXH3_mix16B(input + len - 64, secret + 112, seed);
228 }
229 }
230 }
231 return XXH3_avalanche(acc + acc_end);
232}
233
234constexpr size_t XXH3_MIDSIZE_MAX = 240;
235constexpr size_t XXH3_MIDSIZE_STARTOFFSET = 3;
236constexpr size_t XXH3_MIDSIZE_LASTOFFSET = 17;
237
239static uint64_t XXH3_len_129to240_64b(const uint8_t *input, size_t len,
240 const uint8_t *secret, uint64_t seed) {
241 uint64_t acc = (uint64_t)len * PRIME64_1;
242 const unsigned nbRounds = len / 16;
243 for (unsigned i = 0; i < 8; ++i)
244 acc += XXH3_mix16B(input + 16 * i, secret + 16 * i, seed);
245 acc = XXH3_avalanche(acc);
246
247 for (unsigned i = 8; i < nbRounds; ++i) {
248 acc += XXH3_mix16B(input + 16 * i,
249 secret + 16 * (i - 8) + XXH3_MIDSIZE_STARTOFFSET, seed);
250 }
251 /* last bytes */
252 acc +=
253 XXH3_mix16B(input + len - 16,
255 return XXH3_avalanche(acc);
256}
257
258#if LLVM_XXH_USE_NEON
259
260#define XXH3_accumulate_512 XXH3_accumulate_512_neon
261#define XXH3_scrambleAcc XXH3_scrambleAcc_neon
262
263// NEON implementation based on commit a57f6cce2698049863af8c25787084ae0489d849
264// (July 2024), with the following removed:
265// - workaround for suboptimal codegen on older GCC
266// - compiler barriers against instruction reordering
267// - WebAssembly SIMD support
268// - configurable split between NEON and scalar lanes (benchmarking shows no
269// penalty when fully doing SIMD on the Apple M1)
270
271#if defined(__GNUC__) || defined(__clang__)
272#define XXH_ALIASING __attribute__((__may_alias__))
273#else
274#define XXH_ALIASING /* nothing */
275#endif
276
277typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
278
279LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64x2_t XXH_vld1q_u64(void const *ptr) {
280 return vreinterpretq_u64_u8(vld1q_u8((uint8_t const *)ptr));
281}
282
284static void XXH3_accumulate_512_neon(uint64_t *acc, const uint8_t *input,
285 const uint8_t *secret) {
286 xxh_aliasing_uint64x2_t *const xacc = (xxh_aliasing_uint64x2_t *)acc;
287
288#ifdef __clang__
289#pragma clang loop unroll(full)
290#endif
291 for (size_t i = 0; i < XXH_ACC_NB / 2; i += 2) {
292 /* data_vec = input[i]; */
293 uint64x2_t data_vec_1 = XXH_vld1q_u64(input + (i * 16));
294 uint64x2_t data_vec_2 = XXH_vld1q_u64(input + ((i + 1) * 16));
295
296 /* key_vec = secret[i]; */
297 uint64x2_t key_vec_1 = XXH_vld1q_u64(secret + (i * 16));
298 uint64x2_t key_vec_2 = XXH_vld1q_u64(secret + ((i + 1) * 16));
299
300 /* data_swap = swap(data_vec) */
301 uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
302 uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
303
304 /* data_key = data_vec ^ key_vec; */
305 uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
306 uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
307
308 /*
309 * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a
310 * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to
311 * get one vector with the low 32 bits of each lane, and one vector
312 * with the high 32 bits of each lane.
313 *
314 * The intrinsic returns a double vector because the original ARMv7-a
315 * instruction modified both arguments in place. AArch64 and SIMD128 emit
316 * two instructions from this intrinsic.
317 *
318 * [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]
319 * [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]
320 */
321 uint32x4x2_t unzipped = vuzpq_u32(vreinterpretq_u32_u64(data_key_1),
322 vreinterpretq_u32_u64(data_key_2));
323
324 /* data_key_lo = data_key & 0xFFFFFFFF */
325 uint32x4_t data_key_lo = unzipped.val[0];
326 /* data_key_hi = data_key >> 32 */
327 uint32x4_t data_key_hi = unzipped.val[1];
328
329 /*
330 * Then, we can split the vectors horizontally and multiply which, as for
331 * most widening intrinsics, have a variant that works on both high half
332 * vectors for free on AArch64. A similar instruction is available on
333 * SIMD128.
334 *
335 * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi
336 */
337 uint64x2_t sum_1 = vmlal_u32(data_swap_1, vget_low_u32(data_key_lo),
338 vget_low_u32(data_key_hi));
339 uint64x2_t sum_2 = vmlal_u32(data_swap_2, vget_high_u32(data_key_lo),
340 vget_high_u32(data_key_hi));
341
342 /* xacc[i] = acc_vec + sum; */
343 xacc[i] = vaddq_u64(xacc[i], sum_1);
344 xacc[i + 1] = vaddq_u64(xacc[i + 1], sum_2);
345 }
346}
347
349static void XXH3_scrambleAcc_neon(uint64_t *acc, const uint8_t *secret) {
350 xxh_aliasing_uint64x2_t *const xacc = (xxh_aliasing_uint64x2_t *)acc;
351
352 /* { prime32_1, prime32_1 } */
353 uint32x2_t const kPrimeLo = vdup_n_u32(PRIME32_1);
354 /* { 0, prime32_1, 0, prime32_1 } */
355 uint32x4_t const kPrimeHi =
356 vreinterpretq_u32_u64(vdupq_n_u64((uint64_t)PRIME32_1 << 32));
357
358 for (size_t i = 0; i < XXH_ACC_NB / 2; ++i) {
359 /* xacc[i] ^= (xacc[i] >> 47); */
360 uint64x2_t acc_vec = XXH_vld1q_u64(acc + (2 * i));
361 uint64x2_t shifted = vshrq_n_u64(acc_vec, 47);
362 uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
363
364 /* xacc[i] ^= secret[i]; */
365 uint64x2_t key_vec = XXH_vld1q_u64(secret + (i * 16));
366 uint64x2_t data_key = veorq_u64(data_vec, key_vec);
367
368 /*
369 * xacc[i] *= XXH_PRIME32_1
370 *
371 * Expanded version with portable NEON intrinsics
372 *
373 * lo(x) * lo(y) + (hi(x) * lo(y) << 32)
374 *
375 * prod_hi = hi(data_key) * lo(prime) << 32
376 *
377 * Since we only need 32 bits of this multiply a trick can be used,
378 * reinterpreting the vector as a uint32x4_t and multiplying by
379 * { 0, prime, 0, prime } to cancel out the unwanted bits and avoid the
380 * shift.
381 */
382 uint32x4_t prod_hi = vmulq_u32(vreinterpretq_u32_u64(data_key), kPrimeHi);
383
384 /* Extract low bits for vmlal_u32 */
385 uint32x2_t data_key_lo = vmovn_u64(data_key);
386
387 /* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */
388 xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo);
389 }
390}
391#else
392
393#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
394#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
395
397static void XXH3_accumulate_512_scalar(uint64_t *acc, const uint8_t *input,
398 const uint8_t *secret) {
399 for (size_t i = 0; i < XXH_ACC_NB; ++i) {
400 uint64_t data_val = endian::read64le(input + 8 * i);
401 uint64_t data_key = data_val ^ endian::read64le(secret + 8 * i);
402 acc[i ^ 1] += data_val;
403 acc[i] += uint32_t(data_key) * (data_key >> 32);
404 }
405}
406
408static void XXH3_scrambleAcc_scalar(uint64_t *acc, const uint8_t *secret) {
409 for (size_t i = 0; i < XXH_ACC_NB; ++i) {
410 acc[i] ^= acc[i] >> 47;
411 acc[i] ^= endian::read64le(secret + 8 * i);
412 acc[i] *= PRIME32_1;
413 }
414}
415#endif
416
418static void XXH3_accumulate(uint64_t *acc, const uint8_t *input,
419 const uint8_t *secret, size_t nbStripes) {
420 for (size_t n = 0; n < nbStripes; ++n) {
421 XXH3_accumulate_512(acc, input + n * XXH_STRIPE_LEN,
422 secret + n * XXH_SECRET_CONSUME_RATE);
423 }
424}
425
426static uint64_t XXH3_mix2Accs(const uint64_t *acc, const uint8_t *secret) {
427 return XXH3_mul128_fold64(acc[0] ^ endian::read64le(secret),
428 acc[1] ^ endian::read64le(secret + 8));
429}
430
431static uint64_t XXH3_mergeAccs(const uint64_t *acc, const uint8_t *key,
432 uint64_t start) {
433 uint64_t result64 = start;
434 for (size_t i = 0; i < 4; ++i)
435 result64 += XXH3_mix2Accs(acc + 2 * i, key + 16 * i);
436 return XXH3_avalanche(result64);
437}
438
440static uint64_t XXH3_hashLong_64b(const uint8_t *input, size_t len,
441 const uint8_t *secret, size_t secretSize) {
442 const size_t nbStripesPerBlock =
444 const size_t block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
445 const size_t nb_blocks = (len - 1) / block_len;
446 alignas(16) uint64_t acc[XXH_ACC_NB] = {
449 };
450 for (size_t n = 0; n < nb_blocks; ++n) {
451 XXH3_accumulate(acc, input + n * block_len, secret, nbStripesPerBlock);
452 XXH3_scrambleAcc(acc, secret + secretSize - XXH_STRIPE_LEN);
453 }
454
455 /* last partial block */
456 const size_t nbStripes = (len - 1 - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
457 assert(nbStripes <= secretSize / XXH_SECRET_CONSUME_RATE);
458 XXH3_accumulate(acc, input + nb_blocks * block_len, secret, nbStripes);
459
460 /* last stripe */
461 constexpr size_t XXH_SECRET_LASTACC_START = 7;
462 XXH3_accumulate_512(acc, input + len - XXH_STRIPE_LEN,
463 secret + secretSize - XXH_STRIPE_LEN -
464 XXH_SECRET_LASTACC_START);
465
466 /* converge into final hash */
467 constexpr size_t XXH_SECRET_MERGEACCS_START = 11;
468 return XXH3_mergeAccs(acc, secret + XXH_SECRET_MERGEACCS_START,
469 (uint64_t)len * PRIME64_1);
470}
471
472uint64_t llvm::xxh3_64bits(const uint8_t *in, size_t len) {
473 if (len <= 16)
474 return XXH3_len_0to16_64b(in, len, kSecret, 0);
475 if (len <= 128)
476 return XXH3_len_17to128_64b(in, len, kSecret, 0);
477 if (len <= XXH3_MIDSIZE_MAX)
478 return XXH3_len_129to240_64b(in, len, kSecret, 0);
479 return XXH3_hashLong_64b(in, len, kSecret, sizeof(kSecret));
480}
481
482/* ==========================================
483 * XXH3 128 bits (a.k.a XXH128)
484 * ==========================================
485 * XXH3's 128-bit variant has better mixing and strength than the 64-bit
486 * variant, even without counting the significantly larger output size.
487 *
488 * For example, extra steps are taken to avoid the seed-dependent collisions
489 * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
490 *
491 * This strength naturally comes at the cost of some speed, especially on short
492 * lengths. Note that longer hashes are about as fast as the 64-bit version
493 * due to it using only a slight modification of the 64-bit loop.
494 *
495 * XXH128 is also more oriented towards 64-bit machines. It is still extremely
496 * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
497 */
498
499/*!
500 * @internal
501 * @def XXH_rotl32(x,r)
502 * @brief 32-bit rotate left.
503 *
504 * @param x The 32-bit integer to be rotated.
505 * @param r The number of bits to rotate.
506 * @pre
507 * @p r > 0 && @p r < 32
508 * @note
509 * @p x and @p r may be evaluated multiple times.
510 * @return The rotated result.
511 */
512#if __has_builtin(__builtin_rotateleft32) && \
513 __has_builtin(__builtin_rotateleft64)
514#define XXH_rotl32 __builtin_rotateleft32
515#define XXH_rotl64 __builtin_rotateleft64
516/* Note: although _rotl exists for minGW (GCC under windows), performance seems
517 * poor */
518#elif defined(_MSC_VER)
519#define XXH_rotl32(x, r) _rotl(x, r)
520#define XXH_rotl64(x, r) _rotl64(x, r)
521#else
522#define XXH_rotl32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
523#define XXH_rotl64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
524#endif
525
526#define XXH_mult32to64(x, y) ((uint64_t)(uint32_t)(x) * (uint64_t)(uint32_t)(y))
527
528/*!
529 * @brief Calculates a 64->128-bit long multiply.
530 *
531 * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
532 * version.
533 *
534 * @param lhs , rhs The 64-bit integers to be multiplied
535 * @return The 128-bit result represented in an @ref XXH128_hash_t.
536 */
538 /*
539 * GCC/Clang __uint128_t method.
540 *
541 * On most 64-bit targets, GCC and Clang define a __uint128_t type.
542 * This is usually the best way as it usually uses a native long 64-bit
543 * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
544 *
545 * Usually.
546 *
547 * Despite being a 32-bit platform, Clang (and emscripten) define this type
548 * despite not having the arithmetic for it. This results in a laggy
549 * compiler builtin call which calculates a full 128-bit multiply.
550 * In that case it is best to use the portable one.
551 * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
552 */
553#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) && \
554 defined(__SIZEOF_INT128__) || \
555 (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
556
557 __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;
558 XXH128_hash_t r128;
559 r128.low64 = (uint64_t)(product);
560 r128.high64 = (uint64_t)(product >> 64);
561 return r128;
562
563 /*
564 * MSVC for x64's _umul128 method.
565 *
566 * uint64_t _umul128(uint64_t Multiplier, uint64_t Multiplicand, uint64_t
567 * *HighProduct);
568 *
569 * This compiles to single operand MUL on x64.
570 */
571#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
572
573#ifndef _MSC_VER
574#pragma intrinsic(_umul128)
575#endif
576 uint64_t product_high;
577 uint64_t const product_low = _umul128(lhs, rhs, &product_high);
578 XXH128_hash_t r128;
579 r128.low64 = product_low;
580 r128.high64 = product_high;
581 return r128;
582
583 /*
584 * MSVC for ARM64's __umulh method.
585 *
586 * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
587 */
588#elif defined(_M_ARM64) || defined(_M_ARM64EC)
589
590#ifndef _MSC_VER
591#pragma intrinsic(__umulh)
592#endif
593 XXH128_hash_t r128;
594 r128.low64 = lhs * rhs;
595 r128.high64 = __umulh(lhs, rhs);
596 return r128;
597
598#else
599 /*
600 * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
601 *
602 * This is a fast and simple grade school multiply, which is shown below
603 * with base 10 arithmetic instead of base 0x100000000.
604 *
605 * 9 3 // D2 lhs = 93
606 * x 7 5 // D2 rhs = 75
607 * ----------
608 * 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15
609 * 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45
610 * 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21
611 * + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63
612 * ---------
613 * 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27
614 * + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67
615 * ---------
616 * 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975
617 *
618 * The reasons for adding the products like this are:
619 * 1. It avoids manual carry tracking. Just like how
620 * (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.
621 * This avoids a lot of complexity.
622 *
623 * 2. It hints for, and on Clang, compiles to, the powerful UMAAL
624 * instruction available in ARM's Digital Signal Processing extension
625 * in 32-bit ARMv6 and later, which is shown below:
626 *
627 * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
628 * {
629 * uint64_t product = (uint64_t)*RdLo * (uint64_t)*RdHi + Rn + Rm;
630 * *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
631 * *RdHi = (xxh_u32)(product >> 32);
632 * }
633 *
634 * This instruction was designed for efficient long multiplication, and
635 * allows this to be calculated in only 4 instructions at speeds
636 * comparable to some 64-bit ALUs.
637 *
638 * 3. It isn't terrible on other platforms. Usually this will be a couple
639 * of 32-bit ADD/ADCs.
640 */
641
642 /* First calculate all of the cross products. */
643 uint64_t const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
644 uint64_t const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF);
645 uint64_t const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
646 uint64_t const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32);
647
648 /* Now add the products together. These will never overflow. */
649 uint64_t const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
650 uint64_t const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
651 uint64_t const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
652
653 XXH128_hash_t r128;
654 r128.low64 = lower;
655 r128.high64 = upper;
656 return r128;
657#endif
658}
659
660/*! Seems to produce slightly better code on GCC for some reason. */
662 int shift) {
663 return v64 ^ (v64 >> shift);
664}
665
667XXH3_len_1to3_128b(const uint8_t *input, size_t len, const uint8_t *secret,
668 uint64_t seed) {
669 /* A doubled version of 1to3_64b with different constants. */
670 /*
671 * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
672 * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
673 * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
674 */
675 uint8_t const c1 = input[0];
676 uint8_t const c2 = input[len >> 1];
677 uint8_t const c3 = input[len - 1];
678 uint32_t const combinedl = ((uint32_t)c1 << 16) | ((uint32_t)c2 << 24) |
679 ((uint32_t)c3 << 0) | ((uint32_t)len << 8);
680 uint32_t const combinedh = XXH_rotl32(byteswap(combinedl), 13);
681 uint64_t const bitflipl =
682 (endian::read32le(secret) ^ endian::read32le(secret + 4)) + seed;
683 uint64_t const bitfliph =
684 (endian::read32le(secret + 8) ^ endian::read32le(secret + 12)) - seed;
685 uint64_t const keyed_lo = (uint64_t)combinedl ^ bitflipl;
686 uint64_t const keyed_hi = (uint64_t)combinedh ^ bitfliph;
687 XXH128_hash_t h128;
688 h128.low64 = XXH64_avalanche(keyed_lo);
689 h128.high64 = XXH64_avalanche(keyed_hi);
690 return h128;
691}
692
694XXH3_len_4to8_128b(const uint8_t *input, size_t len, const uint8_t *secret,
695 uint64_t seed) {
696 seed ^= (uint64_t)byteswap((uint32_t)seed) << 32;
697 uint32_t const input_lo = endian::read32le(input);
698 uint32_t const input_hi = endian::read32le(input + len - 4);
699 uint64_t const input_64 = input_lo + ((uint64_t)input_hi << 32);
700 uint64_t const bitflip =
701 (endian::read64le(secret + 16) ^ endian::read64le(secret + 24)) + seed;
702 uint64_t const keyed = input_64 ^ bitflip;
703
704 /* Shift len to the left to ensure it is even, this avoids even multiplies.
705 */
706 XXH128_hash_t m128 = XXH_mult64to128(keyed, PRIME64_1 + (len << 2));
707
708 m128.high64 += (m128.low64 << 1);
709 m128.low64 ^= (m128.high64 >> 3);
710
711 m128.low64 = XXH_xorshift64(m128.low64, 35);
712 m128.low64 *= PRIME_MX2;
713 m128.low64 = XXH_xorshift64(m128.low64, 28);
714 m128.high64 = XXH3_avalanche(m128.high64);
715 return m128;
716}
717
719XXH3_len_9to16_128b(const uint8_t *input, size_t len, const uint8_t *secret,
720 uint64_t seed) {
721 uint64_t const bitflipl =
722 (endian::read64le(secret + 32) ^ endian::read64le(secret + 40)) - seed;
723 uint64_t const bitfliph =
724 (endian::read64le(secret + 48) ^ endian::read64le(secret + 56)) + seed;
725 uint64_t const input_lo = endian::read64le(input);
726 uint64_t input_hi = endian::read64le(input + len - 8);
727 XXH128_hash_t m128 =
728 XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, PRIME64_1);
729 /*
730 * Put len in the middle of m128 to ensure that the length gets mixed to
731 * both the low and high bits in the 128x64 multiply below.
732 */
733 m128.low64 += (uint64_t)(len - 1) << 54;
734 input_hi ^= bitfliph;
735 /*
736 * Add the high 32 bits of input_hi to the high 32 bits of m128, then
737 * add the long product of the low 32 bits of input_hi and PRIME32_2 to
738 * the high 64 bits of m128.
739 *
740 * The best approach to this operation is different on 32-bit and 64-bit.
741 */
742 if (sizeof(void *) < sizeof(uint64_t)) { /* 32-bit */
743 /*
744 * 32-bit optimized version, which is more readable.
745 *
746 * On 32-bit, it removes an ADC and delays a dependency between the two
747 * halves of m128.high64, but it generates an extra mask on 64-bit.
748 */
749 m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) +
751 } else {
752 /*
753 * 64-bit optimized (albeit more confusing) version.
754 *
755 * Uses some properties of addition and multiplication to remove the mask:
756 *
757 * Let:
758 * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
759 * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
760 * c = PRIME32_2
761 *
762 * a + (b * c)
763 * Inverse Property: x + y - x == y
764 * a + (b * (1 + c - 1))
765 * Distributive Property: x * (y + z) == (x * y) + (x * z)
766 * a + (b * 1) + (b * (c - 1))
767 * Identity Property: x * 1 == x
768 * a + b + (b * (c - 1))
769 *
770 * Substitute a, b, and c:
771 * input_hi.hi + input_hi.lo + ((uint64_t)input_hi.lo * (PRIME32_2
772 * - 1))
773 *
774 * Since input_hi.hi + input_hi.lo == input_hi, we get this:
775 * input_hi + ((uint64_t)input_hi.lo * (PRIME32_2 - 1))
776 */
777 m128.high64 += input_hi + XXH_mult32to64((uint32_t)input_hi, PRIME32_2 - 1);
778 }
779 /* m128 ^= XXH_swap64(m128 >> 64); */
780 m128.low64 ^= byteswap(m128.high64);
781
782 /* 128x64 multiply: h128 = m128 * PRIME64_2; */
784 h128.high64 += m128.high64 * PRIME64_2;
785
786 h128.low64 = XXH3_avalanche(h128.low64);
787 h128.high64 = XXH3_avalanche(h128.high64);
788 return h128;
789}
790
791/*
792 * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
793 */
795XXH3_len_0to16_128b(const uint8_t *input, size_t len, const uint8_t *secret,
796 uint64_t seed) {
797 if (len > 8)
798 return XXH3_len_9to16_128b(input, len, secret, seed);
799 if (len >= 4)
800 return XXH3_len_4to8_128b(input, len, secret, seed);
801 if (len)
802 return XXH3_len_1to3_128b(input, len, secret, seed);
803 XXH128_hash_t h128;
804 uint64_t const bitflipl =
805 endian::read64le(secret + 64) ^ endian::read64le(secret + 72);
806 uint64_t const bitfliph =
807 endian::read64le(secret + 80) ^ endian::read64le(secret + 88);
808 h128.low64 = XXH64_avalanche(seed ^ bitflipl);
809 h128.high64 = XXH64_avalanche(seed ^ bitfliph);
810 return h128;
811}
812
813/*
814 * A bit slower than XXH3_mix16B, but handles multiply by zero better.
815 */
817XXH128_mix32B(XXH128_hash_t acc, const uint8_t *input_1, const uint8_t *input_2,
818 const uint8_t *secret, uint64_t seed) {
819 acc.low64 += XXH3_mix16B(input_1, secret + 0, seed);
820 acc.low64 ^= endian::read64le(input_2) + endian::read64le(input_2 + 8);
821 acc.high64 += XXH3_mix16B(input_2, secret + 16, seed);
822 acc.high64 ^= endian::read64le(input_1) + endian::read64le(input_1 + 8);
823 return acc;
824}
825
827XXH3_len_17to128_128b(const uint8_t *input, size_t len, const uint8_t *secret,
828 size_t secretSize, uint64_t seed) {
829 (void)secretSize;
830
831 XXH128_hash_t acc;
832 acc.low64 = len * PRIME64_1;
833 acc.high64 = 0;
834
835 if (len > 32) {
836 if (len > 64) {
837 if (len > 96) {
838 acc =
839 XXH128_mix32B(acc, input + 48, input + len - 64, secret + 96, seed);
840 }
841 acc = XXH128_mix32B(acc, input + 32, input + len - 48, secret + 64, seed);
842 }
843 acc = XXH128_mix32B(acc, input + 16, input + len - 32, secret + 32, seed);
844 }
845 acc = XXH128_mix32B(acc, input, input + len - 16, secret, seed);
846 XXH128_hash_t h128;
847 h128.low64 = acc.low64 + acc.high64;
848 h128.high64 = (acc.low64 * PRIME64_1) + (acc.high64 * PRIME64_4) +
849 ((len - seed) * PRIME64_2);
850 h128.low64 = XXH3_avalanche(h128.low64);
851 h128.high64 = (uint64_t)0 - XXH3_avalanche(h128.high64);
852 return h128;
853}
854
856XXH3_len_129to240_128b(const uint8_t *input, size_t len, const uint8_t *secret,
857 size_t secretSize, uint64_t seed) {
858 (void)secretSize;
859
860 XXH128_hash_t acc;
861 unsigned i;
862 acc.low64 = len * PRIME64_1;
863 acc.high64 = 0;
864 /*
865 * We set as `i` as offset + 32. We do this so that unchanged
866 * `len` can be used as upper bound. This reaches a sweet spot
867 * where both x86 and aarch64 get simple agen and good codegen
868 * for the loop.
869 */
870 for (i = 32; i < 160; i += 32) {
871 acc = XXH128_mix32B(acc, input + i - 32, input + i - 16, secret + i - 32,
872 seed);
873 }
874 acc.low64 = XXH3_avalanche(acc.low64);
875 acc.high64 = XXH3_avalanche(acc.high64);
876 /*
877 * NB: `i <= len` will duplicate the last 32-bytes if
878 * len % 32 was zero. This is an unfortunate necessity to keep
879 * the hash result stable.
880 */
881 for (i = 160; i <= len; i += 32) {
882 acc = XXH128_mix32B(acc, input + i - 32, input + i - 16,
883 secret + XXH3_MIDSIZE_STARTOFFSET + i - 160, seed);
884 }
885 /* last bytes */
886 acc =
887 XXH128_mix32B(acc, input + len - 16, input + len - 32,
889 (uint64_t)0 - seed);
890
891 XXH128_hash_t h128;
892 h128.low64 = acc.low64 + acc.high64;
893 h128.high64 = (acc.low64 * PRIME64_1) + (acc.high64 * PRIME64_4) +
894 ((len - seed) * PRIME64_2);
895 h128.low64 = XXH3_avalanche(h128.low64);
896 h128.high64 = (uint64_t)0 - XXH3_avalanche(h128.high64);
897 return h128;
898}
899
901XXH3_hashLong_128b(const uint8_t *input, size_t len, const uint8_t *secret,
902 size_t secretSize) {
903 const size_t nbStripesPerBlock =
905 const size_t block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
906 const size_t nb_blocks = (len - 1) / block_len;
907 alignas(16) uint64_t acc[XXH_ACC_NB] = {
910 };
911
912 for (size_t n = 0; n < nb_blocks; ++n) {
913 XXH3_accumulate(acc, input + n * block_len, secret, nbStripesPerBlock);
914 XXH3_scrambleAcc(acc, secret + secretSize - XXH_STRIPE_LEN);
915 }
916
917 /* last partial block */
918 const size_t nbStripes = (len - 1 - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
919 assert(nbStripes <= secretSize / XXH_SECRET_CONSUME_RATE);
920 XXH3_accumulate(acc, input + nb_blocks * block_len, secret, nbStripes);
921
922 /* last stripe */
923 constexpr size_t XXH_SECRET_LASTACC_START = 7;
924 XXH3_accumulate_512(acc, input + len - XXH_STRIPE_LEN,
925 secret + secretSize - XXH_STRIPE_LEN -
926 XXH_SECRET_LASTACC_START);
927
928 /* converge into final hash */
929 static_assert(sizeof(acc) == 64);
930 XXH128_hash_t h128;
931 constexpr size_t XXH_SECRET_MERGEACCS_START = 11;
932 h128.low64 = XXH3_mergeAccs(acc, secret + XXH_SECRET_MERGEACCS_START,
933 (uint64_t)len * PRIME64_1);
934 h128.high64 = XXH3_mergeAccs(
935 acc, secret + secretSize - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
936 ~((uint64_t)len * PRIME64_2));
937 return h128;
938}
939
941 /*
942 * If an action is to be taken if `secret` conditions are not respected,
943 * it should be done here.
944 * For now, it's a contract pre-condition.
945 * Adding a check and a branch here would cost performance at every hash.
946 */
947 if (len <= 16)
948 return XXH3_len_0to16_128b(input, len, kSecret, /*seed64=*/0);
949 if (len <= 128)
950 return XXH3_len_17to128_128b(input, len, kSecret, sizeof(kSecret),
951 /*seed64=*/0);
952 if (len <= XXH3_MIDSIZE_MAX)
953 return XXH3_len_129to240_128b(input, len, kSecret, sizeof(kSecret),
954 /*seed64=*/0);
955 return XXH3_hashLong_128b(input, len, kSecret, sizeof(kSecret));
956}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
#define LLVM_ATTRIBUTE_ALWAYS_INLINE
LLVM_ATTRIBUTE_ALWAYS_INLINE - On compilers where we have a directive to do so, mark a method "always...
Definition Compiler.h:356
#define LLVM_ATTRIBUTE_NOINLINE
LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, mark a method "not for inl...
Definition Compiler.h:346
#define LLVM_LIKELY(EXPR)
Definition Compiler.h:335
uint64_t read64le(const void *P)
Definition Endian.h:435
uint32_t read32le(const void *P)
Definition Endian.h:432
This is an optimization pass for GlobalISel generic memory operations.
uint64_t xxh3_64bits(ArrayRef< uint8_t > data)
Inline ArrayRef overloads of the xxhash entry points declared out-of-line in llvm/Support/xxhash....
Definition ArrayRef.h:558
constexpr T byteswap(T V) noexcept
Reverses the bytes in the given integer value V.
Definition bit.h:102
XXH128_hash_t xxh3_128bits(ArrayRef< uint8_t > data)
Definition ArrayRef.h:561
The return value from 128-bit hashes.
Definition xxhash.h:58
uint64_t low64
value & 0xFFFFFFFFFFFFFFFF
Definition xxhash.h:59
uint64_t high64
value >> 64
Definition xxhash.h:60
static uint64_t XXH3_len_9to16_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t const seed)
Definition xxhash.cpp:174
#define XXH_mult32to64(x, y)
Definition xxhash.cpp:526
static uint64_t XXH3_len_4to8_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
Definition xxhash.cpp:157
static LLVM_ATTRIBUTE_NOINLINE uint64_t XXH3_hashLong_64b(const uint8_t *input, size_t len, const uint8_t *secret, size_t secretSize)
Definition xxhash.cpp:440
static LLVM_ATTRIBUTE_ALWAYS_INLINE void XXH3_accumulate(uint64_t *acc, const uint8_t *input, const uint8_t *secret, size_t nbStripes)
Definition xxhash.cpp:418
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_len_1to3_128b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
Definition xxhash.cpp:667
static uint64_t XXH3_len_1to3_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
Definition xxhash.cpp:144
static const uint64_t PRIME64_3
Definition xxhash.cpp:73
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH128_mix32B(XXH128_hash_t acc, const uint8_t *input_1, const uint8_t *input_2, const uint8_t *secret, uint64_t seed)
Definition xxhash.cpp:817
static uint64_t XXH3_mergeAccs(const uint64_t *acc, const uint8_t *key, uint64_t start)
Definition xxhash.cpp:431
static uint64_t XXH3_mix16B(const uint8_t *input, uint8_t const *secret, uint64_t seed)
Definition xxhash.cpp:200
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_len_17to128_128b(const uint8_t *input, size_t len, const uint8_t *secret, size_t secretSize, uint64_t seed)
Definition xxhash.cpp:827
static const uint64_t PRIME64_1
Definition xxhash.cpp:71
static XXH128_hash_t XXH_mult64to128(uint64_t lhs, uint64_t rhs)
Calculates a 64->128-bit long multiply.
Definition xxhash.cpp:537
static uint64_t XXH3_avalanche(uint64_t hash)
Definition xxhash.cpp:137
#define XXH3_accumulate_512
Definition xxhash.cpp:393
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_len_4to8_128b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
Definition xxhash.cpp:694
constexpr size_t XXH3_SECRETSIZE_MIN
Definition xxhash.cpp:86
static LLVM_ATTRIBUTE_NOINLINE XXH128_hash_t XXH3_len_129to240_128b(const uint8_t *input, size_t len, const uint8_t *secret, size_t secretSize, uint64_t seed)
Definition xxhash.cpp:856
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_len_9to16_128b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
Definition xxhash.cpp:719
constexpr uint32_t PRIME32_1
Definition xxhash.cpp:67
constexpr uint32_t PRIME32_2
Definition xxhash.cpp:68
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_len_0to16_128b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
Definition xxhash.cpp:795
#define XXH_rotl32(x, r)
Definition xxhash.cpp:522
static const uint64_t PRIME64_2
Definition xxhash.cpp:72
constexpr size_t XXH3_MIDSIZE_MAX
Definition xxhash.cpp:234
constexpr uint8_t kSecret[XXH_SECRET_DEFAULT_SIZE]
Definition xxhash.cpp:91
static LLVM_ATTRIBUTE_ALWAYS_INLINE uint64_t XXH3_len_17to128_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t const seed)
Definition xxhash.cpp:213
static LLVM_ATTRIBUTE_NOINLINE uint64_t XXH3_len_129to240_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
Definition xxhash.cpp:239
constexpr uint32_t PRIME32_3
Definition xxhash.cpp:69
constexpr size_t XXH_STRIPE_LEN
Definition xxhash.cpp:133
static const uint64_t PRIME64_4
Definition xxhash.cpp:74
#define XXH3_scrambleAcc
Definition xxhash.cpp:394
constexpr size_t XXH_ACC_NB
Definition xxhash.cpp:135
static const uint64_t PRIME64_5
Definition xxhash.cpp:75
LLVM_ATTRIBUTE_ALWAYS_INLINE constexpr uint64_t XXH_xorshift64(uint64_t v64, int shift)
Seems to produce slightly better code on GCC for some reason.
Definition xxhash.cpp:661
LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_hashLong_128b(const uint8_t *input, size_t len, const uint8_t *secret, size_t secretSize)
Definition xxhash.cpp:901
static uint64_t rotl64(uint64_t X, size_t R)
Definition xxhash.cpp:63
static uint64_t XXH64_avalanche(uint64_t hash)
Definition xxhash.cpp:77
static LLVM_ATTRIBUTE_ALWAYS_INLINE void XXH3_scrambleAcc_scalar(uint64_t *acc, const uint8_t *secret)
Definition xxhash.cpp:408
constexpr size_t XXH_SECRET_DEFAULT_SIZE
Definition xxhash.cpp:87
constexpr uint64_t PRIME_MX1
Definition xxhash.cpp:107
static LLVM_ATTRIBUTE_ALWAYS_INLINE void XXH3_accumulate_512_scalar(uint64_t *acc, const uint8_t *input, const uint8_t *secret)
Definition xxhash.cpp:397
static uint64_t XXH3_mix2Accs(const uint64_t *acc, const uint8_t *secret)
Definition xxhash.cpp:426
constexpr size_t XXH3_MIDSIZE_STARTOFFSET
Definition xxhash.cpp:235
static LLVM_ATTRIBUTE_ALWAYS_INLINE uint64_t XXH3_len_0to16_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t const seed)
Definition xxhash.cpp:188
static uint64_t XXH3_mul128_fold64(uint64_t lhs, uint64_t rhs)
Definition xxhash.cpp:111
constexpr size_t XXH_SECRET_CONSUME_RATE
Definition xxhash.cpp:134
constexpr uint64_t PRIME_MX2
Definition xxhash.cpp:108
constexpr size_t XXH3_MIDSIZE_LASTOFFSET
Definition xxhash.cpp:236