in include/hash/xxhash.h [3230:3263]
XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
{
XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);
XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
(void)(&XXH_writeLE64);
XXH_PREFETCH(customSecret);
{ __m256i const seed = _mm256_set_epi64x(-(xxh_i64)seed64, (xxh_i64)seed64, -(xxh_i64)seed64, (xxh_i64)seed64);
XXH_ALIGN(64) const __m256i* const src = (const __m256i*) XXH3_kSecret;
XXH_ALIGN(64) __m256i* dest = ( __m256i*) customSecret;
# if defined(__GNUC__) || defined(__clang__)
/*
* On GCC & Clang, marking 'dest' as modified will cause the compiler:
* - do not extract the secret from sse registers in the internal loop
* - use less common registers, and avoid pushing these reg into stack
* The asm hack causes Clang to assume that XXH3_kSecretPtr aliases with
* customSecret, and on aarch64, this prevented LDP from merging two
* loads together for free. Putting the loads together before the stores
* properly generates LDP.
*/
__asm__("" : "+r" (dest));
# endif
/* GCC -O2 need unroll loop manually */
dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src+0), seed);
dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src+1), seed);
dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src+2), seed);
dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src+3), seed);
dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src+4), seed);
dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src+5), seed);
}
}