void loadIndex()

in cpp/src/hnswalg.h [725:955]


  void loadIndex(std::shared_ptr<InputStream> inputStream,
                 Space<dist_t, data_t> *s, size_t max_elements_i = 0) {
    size_t totalFileSize = 0;
    if (inputStream->isSeekable()) {
      totalFileSize = inputStream->getTotalLength();
    }
    readBinaryPOD(inputStream, offsetLevel0_);
    if (totalFileSize > 0 && offsetLevel0_ > totalFileSize) {
      throw std::domain_error("Index appears to contain corrupted data; level "
                              "0 offset parameter (" +
                              std::to_string(offsetLevel0_) +
                              ") exceeded size of index file (" +
                              std::to_string(totalFileSize) + ").");
    }

    readBinaryPOD(inputStream, max_elements_);
    readBinaryPOD(inputStream, cur_element_count);

    size_t max_elements = max_elements_i;
    if (max_elements < cur_element_count)
      max_elements = max_elements_;
    max_elements_ = max_elements;
    readBinaryPOD(inputStream, size_data_per_element_);
    readBinaryPOD(inputStream, label_offset_);
    readBinaryPOD(inputStream, offsetData_);
    readBinaryPOD(inputStream, maxlevel_);
    readBinaryPOD(inputStream, enterpoint_node_);

    if (enterpoint_node_ >= cur_element_count) {
      throw std::runtime_error(
          "Index seems to be corrupted or unsupported. "
          "Entry point into HNSW data structure was at element index " +
          std::to_string(enterpoint_node_) + ", but only " +
          std::to_string(cur_element_count) +
          " elements are present in the index.");
    }

    readBinaryPOD(inputStream, maxM_);
    readBinaryPOD(inputStream, maxM0_);
    readBinaryPOD(inputStream, M_);
    readBinaryPOD(inputStream, mult_);
    readBinaryPOD(inputStream, ef_construction_);

    data_size_ = s->get_data_size();
    fstdistfunc_ = s->get_dist_func();
    dist_func_param_ = s->get_dist_func_param();

    size_links_per_element_ =
        maxM_ * sizeof(tableint) + sizeof(linklistsizeint);

    size_links_level0_ = maxM0_ * sizeof(tableint) + sizeof(linklistsizeint);

    size_t expected_size_per_element =
        size_links_level0_ + data_size_ + sizeof(labeltype);
    if (size_data_per_element_ != expected_size_per_element) {
      throw std::domain_error(
          "Storage data type does not match the index data being loaded; "
          "expected " +
          std::to_string(expected_size_per_element) +
          " bytes per element, but loaded data contains " +
          std::to_string(size_data_per_element_) +
          " bytes per element. Data being loaded might not be a Voyager index, "
          "may be corrupt, or may be using a different storage data type.");
    }

    long long position = inputStream->getPosition();

    if (inputStream->isSeekable()) {
      inputStream->advanceBy(cur_element_count * size_data_per_element_);
      for (size_t i = 0; i < cur_element_count; i++) {
        if (inputStream->getPosition() < 0 ||
            inputStream->getPosition() >= (long long)totalFileSize) {
          throw std::runtime_error(
              "Index seems to be corrupted or unsupported. Seeked to " +
              std::to_string(position +
                             (cur_element_count * size_data_per_element_) +
                             (sizeof(unsigned int) * i)) +
              " bytes to read linked list, but resulting stream position was " +
              std::to_string(inputStream->getPosition()) +
              " (of total file size " + std::to_string(totalFileSize) +
              " bytes).");
        }

        unsigned int linkListSize;
        readBinaryPOD(inputStream, linkListSize);
        if (linkListSize != 0) {
          if ((size_t)inputStream->getPosition() + linkListSize >
              totalFileSize) {
            throw std::runtime_error(
                "Index seems to be corrupted or unsupported. Advancing to the "
                "next linked list requires " +
                std::to_string(linkListSize) +
                " additional bytes (from position " +
                std::to_string(inputStream->getPosition()) +
                "), but index data only has " + std::to_string(totalFileSize) +
                " bytes in total.");
          }
          inputStream->advanceBy(linkListSize);
        }
      }

      if (inputStream->getPosition() != (long long)totalFileSize)
        throw std::runtime_error(
            "Index seems to be corrupted or unsupported. After reading all "
            "linked lists, extra data remained at the end of the index.");

      inputStream->setPosition(position);
    }

    data_level0_memory_ = (char *)malloc(max_elements * size_data_per_element_);
    if (data_level0_memory_ == nullptr) {
      throw std::runtime_error(
          "Not enough memory: loadIndex failed to allocate level0 (" +
          std::to_string(max_elements * size_data_per_element_) + " bytes)");
    }

    {
      size_t bytes_to_read = cur_element_count * size_data_per_element_;
      size_t bytes_read = inputStream->read(data_level0_memory_, bytes_to_read);
      if (bytes_read != bytes_to_read) {
        throw std::runtime_error("Tried to read " +
                                 std::to_string(bytes_to_read) +
                                 " bytes from stream, but only received " +
                                 std::to_string(bytes_read) + " bytes!");
      }
    }

    linkLists_ = (char **)malloc(sizeof(void *) * max_elements);
    if (linkLists_ == nullptr)
      throw std::runtime_error(
          "Not enough memory: loadIndex failed to allocate linklists (" +
          std::to_string(sizeof(void *) * max_elements) + " bytes)");

    size_t linkListBufferSize = sizeof(void *) * max_elements;
    std::vector<char> linkListBuffer(linkListBufferSize);
    {
      size_t bytes_read = 0;

      while (true) {
        long long bytes_to_read = linkListBuffer.size() - bytes_read;

        long long bytes_read_this_iteration = inputStream->read(
            linkListBuffer.data() + bytes_read, bytes_to_read);

        if (bytes_read_this_iteration > 0) {
          bytes_read += bytes_read_this_iteration;
        }

        if (bytes_read_this_iteration == bytes_to_read) {
          // The link list data will usually be smaller than the buffer we've
          // allocated, but in case it's not, enlarge the buffer and keep
          // reading:
          try {
            linkListBuffer.resize(linkListBuffer.size() * 2);
          } catch (std::exception const &e) {
            throw std::runtime_error(
                "Failed to resize linked list buffer to "
                "double its previous size (from " +
                std::to_string(linkListBuffer.size()) + " to " +
                std::to_string(linkListBuffer.size() * 2) + ")");
          }
        } else {
          // We've hit the end of the stream (as we read fewer bytes than asked
          // for) so stop reading.
          try {
            linkListBuffer.resize(bytes_read);
          } catch (std::exception const &e) {
            throw std::runtime_error("Failed to resize linked list buffer to "
                                     "the number of bytes read (" +
                                     std::to_string(bytes_read) + ")");
          }
          break;
        }
      }
    }

    if (!search_only_) {
      std::vector<std::mutex>(max_elements).swap(link_list_locks_);
      std::vector<std::mutex>(max_update_element_locks)
          .swap(link_list_update_locks_);
    }

    visited_list_pool_ = new VisitedListPool(1, max_elements);

    element_levels_ = std::vector<int>(max_elements);
    revSize_ = 1.0 / mult_;
    ef_ = 10;

    size_t indexInLinkListBuffer = 0;
    for (size_t i = 0; i < cur_element_count; i++) {
      if (!search_only_)
        label_lookup_[getExternalLabel(i)] = i;
      unsigned int linkListSize;

      linkListSize = *((int *)(linkListBuffer.data() + indexInLinkListBuffer));
      indexInLinkListBuffer += sizeof(int);

      if (linkListSize == 0) {
        element_levels_[i] = 0;

        linkLists_[i] = nullptr;
      } else {
        element_levels_[i] = linkListSize / size_links_per_element_;
        linkLists_[i] = (char *)malloc(linkListSize);
        if (linkLists_[i] == nullptr)
          throw std::runtime_error(
              "Not enough memory: loadIndex failed to allocate linklist");

        std::memcpy(linkLists_[i],
                    (linkListBuffer.data() + indexInLinkListBuffer),
                    linkListSize);
        indexInLinkListBuffer += linkListSize;
      }
    }

    if (enterpoint_node_ > 0 && enterpoint_node_ != (tableint)-1 &&
        !linkLists_[enterpoint_node_]) {
      throw std::runtime_error(
          "Index seems to be corrupted or unsupported. "
          "Entry point into HNSW data structure was at element index " +
          std::to_string(enterpoint_node_) +
          ", but no linked list was present at that index.");
    }

    for (size_t i = 0; i < cur_element_count; i++) {
      if (isMarkedDeleted(i))
        num_deleted_ += 1;
    }

    return;
  }