47#if !defined(LLVM_XXH_USE_NEON)
48#if (defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) && \
49 !defined(__ARM_BIG_ENDIAN)
50#define LLVM_XXH_USE_NEON 1
52#define LLVM_XXH_USE_NEON 0
64 return (
X << R) | (
X >> (64 - R));
92 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
93 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
94 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
95 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
96 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
97 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
98 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
99 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
100 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
101 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
102 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
103 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
112#if defined(__SIZEOF_INT128__) || \
113 (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
114 __uint128_t product = (__uint128_t)lhs * (__uint128_t)rhs;
119 const uint64_t lo_lo = (lhs & 0xFFFFFFFF) * (rhs & 0xFFFFFFFF);
120 const uint64_t hi_lo = (lhs >> 32) * (rhs & 0xFFFFFFFF);
121 const uint64_t lo_hi = (lhs & 0xFFFFFFFF) * (rhs >> 32);
122 const uint64_t hi_hi = (lhs >> 32) * (rhs >> 32);
125 const uint64_t cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
126 const uint64_t upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
127 const uint64_t lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
129 return upper ^ lower;
147 const uint8_t c2 = input[len >> 1];
148 const uint8_t c3 = input[len - 1];
171 return acc ^ (acc >> 28);
218 acc_end =
XXH3_mix16B(input + len - 16, secret + 16, seed);
221 acc_end +=
XXH3_mix16B(input + len - 32, secret + 48, seed);
224 acc_end +=
XXH3_mix16B(input + len - 48, secret + 80, seed);
227 acc_end +=
XXH3_mix16B(input + len - 64, secret + 112, seed);
242 const unsigned nbRounds = len / 16;
243 for (
unsigned i = 0; i < 8; ++i)
244 acc +=
XXH3_mix16B(input + 16 * i, secret + 16 * i, seed);
247 for (
unsigned i = 8; i < nbRounds; ++i) {
260#define XXH3_accumulate_512 XXH3_accumulate_512_neon
261#define XXH3_scrambleAcc XXH3_scrambleAcc_neon
271#if defined(__GNUC__) || defined(__clang__)
272#define XXH_ALIASING __attribute__((__may_alias__))
277typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
280 return vreinterpretq_u64_u8(vld1q_u8((
uint8_t const *)ptr));
284static void XXH3_accumulate_512_neon(
uint64_t *acc,
const uint8_t *input,
286 xxh_aliasing_uint64x2_t *
const xacc = (xxh_aliasing_uint64x2_t *)acc;
289#pragma clang loop unroll(full)
291 for (
size_t i = 0; i <
XXH_ACC_NB / 2; i += 2) {
293 uint64x2_t data_vec_1 = XXH_vld1q_u64(input + (i * 16));
294 uint64x2_t data_vec_2 = XXH_vld1q_u64(input + ((i + 1) * 16));
297 uint64x2_t key_vec_1 = XXH_vld1q_u64(secret + (i * 16));
298 uint64x2_t key_vec_2 = XXH_vld1q_u64(secret + ((i + 1) * 16));
301 uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
302 uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
305 uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
306 uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
321 uint32x4x2_t unzipped = vuzpq_u32(vreinterpretq_u32_u64(data_key_1),
322 vreinterpretq_u32_u64(data_key_2));
325 uint32x4_t data_key_lo = unzipped.val[0];
327 uint32x4_t data_key_hi = unzipped.val[1];
337 uint64x2_t sum_1 = vmlal_u32(data_swap_1, vget_low_u32(data_key_lo),
338 vget_low_u32(data_key_hi));
339 uint64x2_t sum_2 = vmlal_u32(data_swap_2, vget_high_u32(data_key_lo),
340 vget_high_u32(data_key_hi));
343 xacc[i] = vaddq_u64(xacc[i], sum_1);
344 xacc[i + 1] = vaddq_u64(xacc[i + 1], sum_2);
349static void XXH3_scrambleAcc_neon(
uint64_t *acc,
const uint8_t *secret) {
350 xxh_aliasing_uint64x2_t *
const xacc = (xxh_aliasing_uint64x2_t *)acc;
353 uint32x2_t
const kPrimeLo = vdup_n_u32(
PRIME32_1);
355 uint32x4_t
const kPrimeHi =
360 uint64x2_t acc_vec = XXH_vld1q_u64(acc + (2 * i));
361 uint64x2_t shifted = vshrq_n_u64(acc_vec, 47);
362 uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
365 uint64x2_t key_vec = XXH_vld1q_u64(secret + (i * 16));
366 uint64x2_t data_key = veorq_u64(data_vec, key_vec);
382 uint32x4_t prod_hi = vmulq_u32(vreinterpretq_u32_u64(data_key), kPrimeHi);
385 uint32x2_t data_key_lo = vmovn_u64(data_key);
388 xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo);
393#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
394#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
402 acc[i ^ 1] += data_val;
403 acc[i] +=
uint32_t(data_key) * (data_key >> 32);
410 acc[i] ^= acc[i] >> 47;
419 const uint8_t *secret,
size_t nbStripes) {
420 for (
size_t n = 0; n < nbStripes; ++n) {
434 for (
size_t i = 0; i < 4; ++i)
441 const uint8_t *secret,
size_t secretSize) {
442 const size_t nbStripesPerBlock =
445 const size_t nb_blocks = (len - 1) / block_len;
450 for (
size_t n = 0; n < nb_blocks; ++n) {
451 XXH3_accumulate(acc, input + n * block_len, secret, nbStripesPerBlock);
456 const size_t nbStripes = (len - 1 - (block_len * nb_blocks)) /
XXH_STRIPE_LEN;
458 XXH3_accumulate(acc, input + nb_blocks * block_len, secret, nbStripes);
461 constexpr size_t XXH_SECRET_LASTACC_START = 7;
464 XXH_SECRET_LASTACC_START);
467 constexpr size_t XXH_SECRET_MERGEACCS_START = 11;
512#if __has_builtin(__builtin_rotateleft32) && \
513 __has_builtin(__builtin_rotateleft64)
514#define XXH_rotl32 __builtin_rotateleft32
515#define XXH_rotl64 __builtin_rotateleft64
518#elif defined(_MSC_VER)
519#define XXH_rotl32(x, r) _rotl(x, r)
520#define XXH_rotl64(x, r) _rotl64(x, r)
522#define XXH_rotl32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
523#define XXH_rotl64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
526#define XXH_mult32to64(x, y) ((uint64_t)(uint32_t)(x) * (uint64_t)(uint32_t)(y))
553#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) && \
554 defined(__SIZEOF_INT128__) || \
555 (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
557 __uint128_t
const product = (__uint128_t)lhs * (__uint128_t)rhs;
571#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
574#pragma intrinsic(_umul128)
577 uint64_t const product_low = _umul128(lhs, rhs, &product_high);
579 r128.
low64 = product_low;
580 r128.
high64 = product_high;
588#elif defined(_M_ARM64) || defined(_M_ARM64EC)
591#pragma intrinsic(__umulh)
594 r128.
low64 = lhs * rhs;
595 r128.
high64 = __umulh(lhs, rhs);
649 uint64_t const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
650 uint64_t const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
651 uint64_t const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
663 return v64 ^ (v64 >> shift);
676 uint8_t const c2 = input[len >> 1];
677 uint8_t const c3 = input[len - 1];
702 uint64_t const keyed = input_64 ^ bitflip;
734 input_hi ^= bitfliph;
742 if (
sizeof(
void *) <
sizeof(
uint64_t)) {
749 m128.
high64 += (input_hi & 0xFFFFFFFF00000000ULL) +
821 acc.high64 +=
XXH3_mix16B(input_2, secret + 16, seed);
839 XXH128_mix32B(acc, input + 48, input + len - 64, secret + 96, seed);
841 acc =
XXH128_mix32B(acc, input + 32, input + len - 48, secret + 64, seed);
843 acc =
XXH128_mix32B(acc, input + 16, input + len - 32, secret + 32, seed);
845 acc =
XXH128_mix32B(acc, input, input + len - 16, secret, seed);
847 h128.
low64 = acc.low64 + acc.high64;
870 for (i = 32; i < 160; i += 32) {
871 acc =
XXH128_mix32B(acc, input + i - 32, input + i - 16, secret + i - 32,
881 for (i = 160; i <= len; i += 32) {
892 h128.
low64 = acc.low64 + acc.high64;
903 const size_t nbStripesPerBlock =
906 const size_t nb_blocks = (len - 1) / block_len;
912 for (
size_t n = 0; n < nb_blocks; ++n) {
913 XXH3_accumulate(acc, input + n * block_len, secret, nbStripesPerBlock);
918 const size_t nbStripes = (len - 1 - (block_len * nb_blocks)) /
XXH_STRIPE_LEN;
920 XXH3_accumulate(acc, input + nb_blocks * block_len, secret, nbStripes);
923 constexpr size_t XXH_SECRET_LASTACC_START = 7;
926 XXH_SECRET_LASTACC_START);
929 static_assert(
sizeof(acc) == 64);
931 constexpr size_t XXH_SECRET_MERGEACCS_START = 11;
935 acc, secret + secretSize -
sizeof(acc) - XXH_SECRET_MERGEACCS_START,
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define LLVM_ATTRIBUTE_ALWAYS_INLINE
LLVM_ATTRIBUTE_ALWAYS_INLINE - On compilers where we have a directive to do so, mark a method "always...
#define LLVM_ATTRIBUTE_NOINLINE
LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, mark a method "not for inl...
#define LLVM_LIKELY(EXPR)
uint64_t read64le(const void *P)
uint32_t read32le(const void *P)
This is an optimization pass for GlobalISel generic memory operations.
uint64_t xxh3_64bits(ArrayRef< uint8_t > data)
Inline ArrayRef overloads of the xxhash entry points declared out-of-line in llvm/Support/xxhash....
constexpr T byteswap(T V) noexcept
Reverses the bytes in the given integer value V.
XXH128_hash_t xxh3_128bits(ArrayRef< uint8_t > data)
The return value from 128-bit hashes.
uint64_t low64
value & 0xFFFFFFFFFFFFFFFF
uint64_t high64
value >> 64
static uint64_t XXH3_len_9to16_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t const seed)
#define XXH_mult32to64(x, y)
static uint64_t XXH3_len_4to8_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
static LLVM_ATTRIBUTE_NOINLINE uint64_t XXH3_hashLong_64b(const uint8_t *input, size_t len, const uint8_t *secret, size_t secretSize)
static LLVM_ATTRIBUTE_ALWAYS_INLINE void XXH3_accumulate(uint64_t *acc, const uint8_t *input, const uint8_t *secret, size_t nbStripes)
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_len_1to3_128b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
static uint64_t XXH3_len_1to3_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
static const uint64_t PRIME64_3
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH128_mix32B(XXH128_hash_t acc, const uint8_t *input_1, const uint8_t *input_2, const uint8_t *secret, uint64_t seed)
static uint64_t XXH3_mergeAccs(const uint64_t *acc, const uint8_t *key, uint64_t start)
static uint64_t XXH3_mix16B(const uint8_t *input, uint8_t const *secret, uint64_t seed)
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_len_17to128_128b(const uint8_t *input, size_t len, const uint8_t *secret, size_t secretSize, uint64_t seed)
static const uint64_t PRIME64_1
static XXH128_hash_t XXH_mult64to128(uint64_t lhs, uint64_t rhs)
Calculates a 64->128-bit long multiply.
static uint64_t XXH3_avalanche(uint64_t hash)
#define XXH3_accumulate_512
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_len_4to8_128b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
constexpr size_t XXH3_SECRETSIZE_MIN
static LLVM_ATTRIBUTE_NOINLINE XXH128_hash_t XXH3_len_129to240_128b(const uint8_t *input, size_t len, const uint8_t *secret, size_t secretSize, uint64_t seed)
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_len_9to16_128b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
constexpr uint32_t PRIME32_1
constexpr uint32_t PRIME32_2
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_len_0to16_128b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
static const uint64_t PRIME64_2
constexpr size_t XXH3_MIDSIZE_MAX
constexpr uint8_t kSecret[XXH_SECRET_DEFAULT_SIZE]
static LLVM_ATTRIBUTE_ALWAYS_INLINE uint64_t XXH3_len_17to128_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t const seed)
static LLVM_ATTRIBUTE_NOINLINE uint64_t XXH3_len_129to240_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
constexpr uint32_t PRIME32_3
constexpr size_t XXH_STRIPE_LEN
static const uint64_t PRIME64_4
constexpr size_t XXH_ACC_NB
static const uint64_t PRIME64_5
LLVM_ATTRIBUTE_ALWAYS_INLINE constexpr uint64_t XXH_xorshift64(uint64_t v64, int shift)
Seems to produce slightly better code on GCC for some reason.
LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_hashLong_128b(const uint8_t *input, size_t len, const uint8_t *secret, size_t secretSize)
static uint64_t rotl64(uint64_t X, size_t R)
static uint64_t XXH64_avalanche(uint64_t hash)
static LLVM_ATTRIBUTE_ALWAYS_INLINE void XXH3_scrambleAcc_scalar(uint64_t *acc, const uint8_t *secret)
constexpr size_t XXH_SECRET_DEFAULT_SIZE
constexpr uint64_t PRIME_MX1
static LLVM_ATTRIBUTE_ALWAYS_INLINE void XXH3_accumulate_512_scalar(uint64_t *acc, const uint8_t *input, const uint8_t *secret)
static uint64_t XXH3_mix2Accs(const uint64_t *acc, const uint8_t *secret)
constexpr size_t XXH3_MIDSIZE_STARTOFFSET
static LLVM_ATTRIBUTE_ALWAYS_INLINE uint64_t XXH3_len_0to16_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t const seed)
static uint64_t XXH3_mul128_fold64(uint64_t lhs, uint64_t rhs)
constexpr size_t XXH_SECRET_CONSUME_RATE
constexpr uint64_t PRIME_MX2
constexpr size_t XXH3_MIDSIZE_LASTOFFSET