in cpp/src/hnswalg.h [725:955]
void loadIndex(std::shared_ptr<InputStream> inputStream,
Space<dist_t, data_t> *s, size_t max_elements_i = 0) {
size_t totalFileSize = 0;
if (inputStream->isSeekable()) {
totalFileSize = inputStream->getTotalLength();
}
readBinaryPOD(inputStream, offsetLevel0_);
if (totalFileSize > 0 && offsetLevel0_ > totalFileSize) {
throw std::domain_error("Index appears to contain corrupted data; level "
"0 offset parameter (" +
std::to_string(offsetLevel0_) +
") exceeded size of index file (" +
std::to_string(totalFileSize) + ").");
}
readBinaryPOD(inputStream, max_elements_);
readBinaryPOD(inputStream, cur_element_count);
size_t max_elements = max_elements_i;
if (max_elements < cur_element_count)
max_elements = max_elements_;
max_elements_ = max_elements;
readBinaryPOD(inputStream, size_data_per_element_);
readBinaryPOD(inputStream, label_offset_);
readBinaryPOD(inputStream, offsetData_);
readBinaryPOD(inputStream, maxlevel_);
readBinaryPOD(inputStream, enterpoint_node_);
if (enterpoint_node_ >= cur_element_count) {
throw std::runtime_error(
"Index seems to be corrupted or unsupported. "
"Entry point into HNSW data structure was at element index " +
std::to_string(enterpoint_node_) + ", but only " +
std::to_string(cur_element_count) +
" elements are present in the index.");
}
readBinaryPOD(inputStream, maxM_);
readBinaryPOD(inputStream, maxM0_);
readBinaryPOD(inputStream, M_);
readBinaryPOD(inputStream, mult_);
readBinaryPOD(inputStream, ef_construction_);
data_size_ = s->get_data_size();
fstdistfunc_ = s->get_dist_func();
dist_func_param_ = s->get_dist_func_param();
size_links_per_element_ =
maxM_ * sizeof(tableint) + sizeof(linklistsizeint);
size_links_level0_ = maxM0_ * sizeof(tableint) + sizeof(linklistsizeint);
size_t expected_size_per_element =
size_links_level0_ + data_size_ + sizeof(labeltype);
if (size_data_per_element_ != expected_size_per_element) {
throw std::domain_error(
"Storage data type does not match the index data being loaded; "
"expected " +
std::to_string(expected_size_per_element) +
" bytes per element, but loaded data contains " +
std::to_string(size_data_per_element_) +
" bytes per element. Data being loaded might not be a Voyager index, "
"may be corrupt, or may be using a different storage data type.");
}
long long position = inputStream->getPosition();
if (inputStream->isSeekable()) {
inputStream->advanceBy(cur_element_count * size_data_per_element_);
for (size_t i = 0; i < cur_element_count; i++) {
if (inputStream->getPosition() < 0 ||
inputStream->getPosition() >= (long long)totalFileSize) {
throw std::runtime_error(
"Index seems to be corrupted or unsupported. Seeked to " +
std::to_string(position +
(cur_element_count * size_data_per_element_) +
(sizeof(unsigned int) * i)) +
" bytes to read linked list, but resulting stream position was " +
std::to_string(inputStream->getPosition()) +
" (of total file size " + std::to_string(totalFileSize) +
" bytes).");
}
unsigned int linkListSize;
readBinaryPOD(inputStream, linkListSize);
if (linkListSize != 0) {
if ((size_t)inputStream->getPosition() + linkListSize >
totalFileSize) {
throw std::runtime_error(
"Index seems to be corrupted or unsupported. Advancing to the "
"next linked list requires " +
std::to_string(linkListSize) +
" additional bytes (from position " +
std::to_string(inputStream->getPosition()) +
"), but index data only has " + std::to_string(totalFileSize) +
" bytes in total.");
}
inputStream->advanceBy(linkListSize);
}
}
if (inputStream->getPosition() != (long long)totalFileSize)
throw std::runtime_error(
"Index seems to be corrupted or unsupported. After reading all "
"linked lists, extra data remained at the end of the index.");
inputStream->setPosition(position);
}
data_level0_memory_ = (char *)malloc(max_elements * size_data_per_element_);
if (data_level0_memory_ == nullptr) {
throw std::runtime_error(
"Not enough memory: loadIndex failed to allocate level0 (" +
std::to_string(max_elements * size_data_per_element_) + " bytes)");
}
{
size_t bytes_to_read = cur_element_count * size_data_per_element_;
size_t bytes_read = inputStream->read(data_level0_memory_, bytes_to_read);
if (bytes_read != bytes_to_read) {
throw std::runtime_error("Tried to read " +
std::to_string(bytes_to_read) +
" bytes from stream, but only received " +
std::to_string(bytes_read) + " bytes!");
}
}
linkLists_ = (char **)malloc(sizeof(void *) * max_elements);
if (linkLists_ == nullptr)
throw std::runtime_error(
"Not enough memory: loadIndex failed to allocate linklists (" +
std::to_string(sizeof(void *) * max_elements) + " bytes)");
size_t linkListBufferSize = sizeof(void *) * max_elements;
std::vector<char> linkListBuffer(linkListBufferSize);
{
size_t bytes_read = 0;
while (true) {
long long bytes_to_read = linkListBuffer.size() - bytes_read;
long long bytes_read_this_iteration = inputStream->read(
linkListBuffer.data() + bytes_read, bytes_to_read);
if (bytes_read_this_iteration > 0) {
bytes_read += bytes_read_this_iteration;
}
if (bytes_read_this_iteration == bytes_to_read) {
// The link list data will usually be smaller than the buffer we've
// allocated, but in case it's not, enlarge the buffer and keep
// reading:
try {
linkListBuffer.resize(linkListBuffer.size() * 2);
} catch (std::exception const &e) {
throw std::runtime_error(
"Failed to resize linked list buffer to "
"double its previous size (from " +
std::to_string(linkListBuffer.size()) + " to " +
std::to_string(linkListBuffer.size() * 2) + ")");
}
} else {
// We've hit the end of the stream (as we read fewer bytes than asked
// for) so stop reading.
try {
linkListBuffer.resize(bytes_read);
} catch (std::exception const &e) {
throw std::runtime_error("Failed to resize linked list buffer to "
"the number of bytes read (" +
std::to_string(bytes_read) + ")");
}
break;
}
}
}
if (!search_only_) {
std::vector<std::mutex>(max_elements).swap(link_list_locks_);
std::vector<std::mutex>(max_update_element_locks)
.swap(link_list_update_locks_);
}
visited_list_pool_ = new VisitedListPool(1, max_elements);
element_levels_ = std::vector<int>(max_elements);
revSize_ = 1.0 / mult_;
ef_ = 10;
size_t indexInLinkListBuffer = 0;
for (size_t i = 0; i < cur_element_count; i++) {
if (!search_only_)
label_lookup_[getExternalLabel(i)] = i;
unsigned int linkListSize;
linkListSize = *((int *)(linkListBuffer.data() + indexInLinkListBuffer));
indexInLinkListBuffer += sizeof(int);
if (linkListSize == 0) {
element_levels_[i] = 0;
linkLists_[i] = nullptr;
} else {
element_levels_[i] = linkListSize / size_links_per_element_;
linkLists_[i] = (char *)malloc(linkListSize);
if (linkLists_[i] == nullptr)
throw std::runtime_error(
"Not enough memory: loadIndex failed to allocate linklist");
std::memcpy(linkLists_[i],
(linkListBuffer.data() + indexInLinkListBuffer),
linkListSize);
indexInLinkListBuffer += linkListSize;
}
}
if (enterpoint_node_ > 0 && enterpoint_node_ != (tableint)-1 &&
!linkLists_[enterpoint_node_]) {
throw std::runtime_error(
"Index seems to be corrupted or unsupported. "
"Entry point into HNSW data structure was at element index " +
std::to_string(enterpoint_node_) +
", but no linked list was present at that index.");
}
for (size_t i = 0; i < cur_element_count; i++) {
if (isMarkedDeleted(i))
num_deleted_ += 1;
}
return;
}