in src/annoylib.h [289:309]
inline float dot<float>(const float* x, const float *y, int f) {
float result = 0;
if (f > 15) {
__m512 d = _mm512_setzero_ps();
for (; f > 15; f -= 16) {
//AVX512F includes FMA
d = _mm512_fmadd_ps(_mm512_loadu_ps(x), _mm512_loadu_ps(y), d);
x += 16;
y += 16;
}
// Sum all floats in dot register.
result += _mm512_reduce_add_ps(d);
}
// Don't forget the remaining values.
for (; f > 0; f--) {
result += *x * *y;
x++;
y++;
}
return result;
}