in src/annoylib.h [662:694]
static inline void preprocess(void* nodes, size_t _s, const S node_count, const int f) {
// This uses a method from Microsoft Research for transforming inner product spaces to cosine/angular-compatible spaces.
// (Bachrach et al., 2014, see https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/XboxInnerProduct.pdf)
// Step one: compute the norm of each vector and store that in its extra dimension (f-1)
for (S i = 0; i < node_count; i++) {
Node* node = get_node_ptr<S, Node>(nodes, _s, i);
T d = dot(node->v, node->v, f);
T norm = d < 0 ? 0 : sqrt(d);
node->dot_factor = norm;
node->built = false;
}
// Step two: find the maximum norm
T max_norm = 0;
for (S i = 0; i < node_count; i++) {
Node* node = get_node_ptr<S, Node>(nodes, _s, i);
if (node->dot_factor > max_norm) {
max_norm = node->dot_factor;
}
}
// Step three: set each vector's extra dimension to sqrt(max_norm^2 - norm^2)
for (S i = 0; i < node_count; i++) {
Node* node = get_node_ptr<S, Node>(nodes, _s, i);
T node_norm = node->dot_factor;
T squared_norm_diff = pow(max_norm, static_cast<T>(2.0)) - pow(node_norm, static_cast<T>(2.0));
T dot_factor = squared_norm_diff < 0 ? 0 : sqrt(squared_norm_diff);
node->norm = pow(max_norm, static_cast<T>(2.0));
node->dot_factor = dot_factor;
}
}