benchmarks/index_query.py (57 lines of code) (raw):

# # Copyright 2022-2023 Spotify AB # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from io import BytesIO from itertools import product from typing import Dict import numpy as np import voyager class IndexQuerySuite: repeat = (1, 10, 30.0) params = ( [256], [4096], [voyager.Space.Euclidean, voyager.Space.InnerProduct, voyager.Space.Cosine], [voyager.StorageDataType.E4M3, voyager.StorageDataType.Float8, voyager.StorageDataType.Float32], [24], ) param_names = ["num_dimensions", "num_elements", "space", "storage_data_type", "ef_construction"] def setup_cache(self) -> Dict: param_combinations = product(*self.params) data = {} for param_combination in param_combinations: num_dimensions, num_elements, space, storage_data_type, ef_construction = param_combination generator = np.random.default_rng(seed=1234) input_data = generator.random((num_elements, num_dimensions)).astype(np.float32) * 2 - 1 if storage_data_type == voyager.StorageDataType.Float8: input_data = np.round(input_data * 127) / 127 index = voyager.Index( space=space, num_dimensions=num_dimensions, ef_construction=ef_construction, M=20, storage_data_type=storage_data_type, random_seed=4321, ) index.add_items(input_data, num_threads=1) data[param_combination] = (index.as_bytes(), input_data) return data def setup( self, cached_data: Dict, num_dimensions: int, num_elements: int, space: voyager.Space, storage_data_type: voyager.StorageDataType, ef_construction: float, ): index_as_bytes, self.input_data = cached_data[ num_dimensions, num_elements, space, storage_data_type, ef_construction ] self.index = voyager.Index.load(BytesIO(index_as_bytes)) def time_query_k_1(self, *_): self.index.query(self.input_data, k=1, num_threads=1) def time_query_k_20(self, *_): self.index.query(self.input_data, k=20, num_threads=1) def track_recall(self, *_): labels, _ = self.index.query(self.input_data, k=1, num_threads=1) matches = np.sum(labels[:, 0] == np.arange(len(self.input_data))) recall = matches / len(self.input_data) return recall