in cpp/src/hnswalg.h [466:598]
tableint mutuallyConnectNewElement(
const data_t *data_point, tableint cur_c,
std::priority_queue<std::pair<dist_t, tableint>,
std::vector<std::pair<dist_t, tableint>>,
CompareByFirst> &top_candidates,
int level, bool isUpdate) {
size_t Mcurmax = level ? maxM_ : maxM0_;
getNeighborsByHeuristic2(top_candidates, M_);
if (top_candidates.size() > M_)
throw std::runtime_error(
"Should be not be more than M_ candidates returned by the heuristic");
std::vector<tableint> selectedNeighbors;
selectedNeighbors.reserve(M_);
while (top_candidates.size() > 0) {
selectedNeighbors.push_back(top_candidates.top().second);
top_candidates.pop();
}
tableint next_closest_entry_point = selectedNeighbors.back();
{
linklistsizeint *ll_cur;
if (level == 0)
ll_cur = get_linklist0(cur_c);
else
ll_cur = get_linklist(cur_c, level);
if (*ll_cur && !isUpdate) {
throw std::runtime_error(
"The newly inserted element should have blank link list");
}
setListCount(ll_cur, selectedNeighbors.size());
tableint *data = (tableint *)(ll_cur + 1);
for (size_t idx = 0; idx < selectedNeighbors.size(); idx++) {
if (data[idx] && !isUpdate)
throw std::runtime_error("Possible memory corruption");
if (level > element_levels_[selectedNeighbors[idx]])
throw std::runtime_error(
"Trying to make a link on a non-existent level");
data[idx] = selectedNeighbors[idx];
}
}
for (size_t idx = 0; idx < selectedNeighbors.size(); idx++) {
std::unique_lock<std::mutex> lock(
link_list_locks_[selectedNeighbors[idx]]);
linklistsizeint *ll_other;
if (level == 0)
ll_other = get_linklist0(selectedNeighbors[idx]);
else
ll_other = get_linklist(selectedNeighbors[idx], level);
size_t sz_link_list_other = getListCount(ll_other);
if (sz_link_list_other > Mcurmax)
throw std::runtime_error("Bad value of sz_link_list_other");
if (selectedNeighbors[idx] == cur_c)
throw std::runtime_error("Trying to connect an element to itself");
if (level > element_levels_[selectedNeighbors[idx]])
throw std::runtime_error(
"Trying to make a link on a non-existent level");
tableint *data = (tableint *)(ll_other + 1);
bool is_cur_c_present = false;
if (isUpdate) {
for (size_t j = 0; j < sz_link_list_other; j++) {
if (data[j] == cur_c) {
is_cur_c_present = true;
break;
}
}
}
// If cur_c is already present in the neighboring connections of
// `selectedNeighbors[idx]` then no need to modify any connections or run
// the heuristics.
if (!is_cur_c_present) {
if (sz_link_list_other < Mcurmax) {
data[sz_link_list_other] = cur_c;
setListCount(ll_other, sz_link_list_other + 1);
} else {
// finding the "weakest" element to replace it with the new one
dist_t d_max = fstdistfunc_(
getDataByInternalId(cur_c),
getDataByInternalId(selectedNeighbors[idx]), dist_func_param_);
// Heuristic:
std::priority_queue<std::pair<dist_t, tableint>,
std::vector<std::pair<dist_t, tableint>>,
CompareByFirst>
candidates;
candidates.emplace(d_max, cur_c);
for (size_t j = 0; j < sz_link_list_other; j++) {
candidates.emplace(
fstdistfunc_(getDataByInternalId(data[j]),
getDataByInternalId(selectedNeighbors[idx]),
dist_func_param_),
data[j]);
}
getNeighborsByHeuristic2(candidates, Mcurmax);
int indx = 0;
while (candidates.size() > 0) {
data[indx] = candidates.top().second;
candidates.pop();
indx++;
}
setListCount(ll_other, indx);
// Nearest K:
/*int indx = -1;
for (int j = 0; j < sz_link_list_other; j++) {
dist_t d = fstdistfunc_(getDataByInternalId(data[j]),
getDataByInternalId(rez[idx]), dist_func_param_); if (d > d_max) {
indx = j;
d_max = d;
}
}
if (indx >= 0) {
data[indx] = cur_c;
} */
}
}
}
return next_closest_entry_point;
}