diff --git a/apps/build_memory_index.cpp b/apps/build_memory_index.cpp index 92b269f4f..1d6f0e7c6 100644 --- a/apps/build_memory_index.cpp +++ b/apps/build_memory_index.cpp @@ -22,50 +22,6 @@ namespace po = boost::program_options; -template -int build_in_memory_index(const diskann::Metric &metric, const std::string &data_path, const uint32_t R, - const uint32_t L, const float alpha, const std::string &save_path, const uint32_t num_threads, - const bool use_pq_build, const size_t num_pq_bytes, const bool use_opq, - const std::string &label_file, const std::string &universal_label, const uint32_t Lf) -{ - diskann::IndexWriteParameters paras = diskann::IndexWriteParametersBuilder(L, R) - .with_filter_list_size(Lf) - .with_alpha(alpha) - .with_saturate_graph(false) - .with_num_threads(num_threads) - .build(); - std::string labels_file_to_use = save_path + "_label_formatted.txt"; - std::string mem_labels_int_map_file = save_path + "_labels_map.txt"; - - size_t data_num, data_dim; - diskann::get_bin_metadata(data_path, data_num, data_dim); - - diskann::Index index(metric, data_dim, data_num, false, false, false, use_pq_build, num_pq_bytes, - use_opq); - auto s = std::chrono::high_resolution_clock::now(); - if (label_file == "") - { - index.build(data_path.c_str(), data_num, paras); - } - else - { - convert_labels_string_to_int(label_file, labels_file_to_use, mem_labels_int_map_file, universal_label); - if (universal_label != "") - { - LabelT unv_label_as_num = 0; - index.set_universal_label(unv_label_as_num); - } - index.build_filtered_index(data_path.c_str(), labels_file_to_use, data_num, paras); - } - std::chrono::duration diff = std::chrono::high_resolution_clock::now() - s; - - std::cout << "Indexing time: " << diff.count() << "\n"; - index.save(save_path.c_str()); - if (label_file != "") - std::remove(labels_file_to_use.c_str()); - return 0; -} - int main(int argc, char **argv) { std::string data_type, dist_fn, data_path, index_path_prefix, label_file, universal_label, label_type; diff --git a/apps/build_stitched_index.cpp b/apps/build_stitched_index.cpp index 80481f8b0..069651781 100644 --- a/apps/build_stitched_index.cpp +++ b/apps/build_stitched_index.cpp @@ -285,7 +285,7 @@ void prune_and_save(path final_index_path_prefix, path full_index_path_prefix, p auto pruning_index_timer = std::chrono::high_resolution_clock::now(); diskann::get_bin_metadata(input_data_path, number_of_label_points, dimension); - diskann::Index index(diskann::Metric::L2, dimension, number_of_label_points, false, false); + diskann::Index index(diskann::Metric::L2, dimension, number_of_label_points, nullptr, nullptr, 0, false, false); // not searching this index, set search_l to 0 index.load(full_index_path_prefix.c_str(), num_threads, 1); diff --git a/apps/test_insert_deletes_consolidate.cpp b/apps/test_insert_deletes_consolidate.cpp index 700f4d7b6..8999688ea 100644 --- a/apps/test_insert_deletes_consolidate.cpp +++ b/apps/test_insert_deletes_consolidate.cpp @@ -152,14 +152,14 @@ void build_incremental_index(const std::string &data_path, diskann::IndexWritePa using TagT = uint32_t; auto data_type = diskann_type_to_name(); auto tag_type = diskann_type_to_name(); + auto index_search_params = diskann::IndexSearchParams(params.search_list_size, params.num_threads); diskann::IndexConfig index_config = diskann::IndexConfigBuilder() .with_metric(diskann::L2) .with_dimension(dim) .with_max_points(max_points_to_insert) .is_dynamic_index(true) .with_index_write_params(params) - .with_search_threads(params.num_threads) - .with_initial_search_list_size(params.search_list_size) + .with_index_search_params(index_search_params) .with_data_type(data_type) .with_tag_type(tag_type) .with_data_load_store_strategy(diskann::MEMORY) diff --git a/apps/test_streaming_scenario.cpp b/apps/test_streaming_scenario.cpp index 55e4e61cf..c40ee251e 100644 --- a/apps/test_streaming_scenario.cpp +++ b/apps/test_streaming_scenario.cpp @@ -186,6 +186,7 @@ void build_incremental_index(const std::string &data_path, const uint32_t L, con .with_num_frozen_points(num_start_pts) .build(); + auto index_search_params = diskann::IndexSearchParams(L, insert_threads); diskann::IndexWriteParameters delete_params = diskann::IndexWriteParametersBuilder(L, R) .with_max_occlusion_size(C) .with_alpha(alpha) @@ -200,7 +201,6 @@ void build_incremental_index(const std::string &data_path, const uint32_t L, con diskann::cout << "metadata: file " << data_path << " has " << num_points << " points in " << dim << " dims" << std::endl; aligned_dim = ROUND_UP(dim, 8); - auto index_config = diskann::IndexConfigBuilder() .with_metric(diskann::L2) .with_dimension(dim) @@ -210,12 +210,11 @@ void build_incremental_index(const std::string &data_path, const uint32_t L, con .is_use_opq(false) .with_num_pq_chunks(0) .is_pq_dist_build(false) - .with_search_threads(insert_threads) - .with_initial_search_list_size(L) .with_tag_type(diskann_type_to_name()) .with_label_type(diskann_type_to_name()) .with_data_type(diskann_type_to_name()) .with_index_write_params(params) + .with_index_search_params(index_search_params) .with_data_load_store_strategy(diskann::MEMORY) .build(); diff --git a/apps/utils/count_bfs_levels.cpp b/apps/utils/count_bfs_levels.cpp index ddc4eaf0b..1ec8225db 100644 --- a/apps/utils/count_bfs_levels.cpp +++ b/apps/utils/count_bfs_levels.cpp @@ -27,7 +27,7 @@ template void bfs_count(const std::string &index_path, uint32_t dat { using TagT = uint32_t; using LabelT = uint32_t; - diskann::Index index(diskann::Metric::L2, data_dims, 0, false, false); + diskann::Index index(diskann::Metric::L2, data_dims, 0, nullptr, nullptr, 0, false, false); std::cout << "Index class instantiated" << std::endl; index.load(index_path.c_str(), 1, 100); std::cout << "Index loaded" << std::endl; diff --git a/include/in_mem_data_store.h b/include/in_mem_data_store.h index 0509b3b82..9b6968b03 100644 --- a/include/in_mem_data_store.h +++ b/include/in_mem_data_store.h @@ -21,7 +21,7 @@ namespace diskann template class InMemDataStore : public AbstractDataStore { public: - InMemDataStore(const location_t capacity, const size_t dim, std::shared_ptr> distance_fn); + InMemDataStore(const location_t capacity, const size_t dim, std::unique_ptr> distance_fn); virtual ~InMemDataStore(); virtual location_t load(const std::string &filename) override; @@ -73,7 +73,7 @@ template class InMemDataStore : public AbstractDataStore> _distance_fn; + std::unique_ptr> _distance_fn; // in case we need to save vector norms for optimization std::shared_ptr _pre_computed_norms; diff --git a/include/index.h b/include/index.h index 0d9b6edb9..095d1599a 100644 --- a/include/index.h +++ b/include/index.h @@ -49,21 +49,16 @@ template clas **************************************************************************/ public: - // Constructor for Bulk operations and for creating the index object solely - // for loading a prexisting index. - DISKANN_DLLEXPORT Index(Metric m, const size_t dim, const size_t max_points = 1, const bool dynamic_index = false, + // Call this when creating and passing Index Config is inconvenient. + DISKANN_DLLEXPORT Index(Metric m, const size_t dim, const size_t max_points, + const std::shared_ptr index_parameters, + const std::shared_ptr index_search_params, + const size_t num_frozen_pts = 0, const bool dynamic_index = false, const bool enable_tags = false, const bool concurrent_consolidate = false, const bool pq_dist_build = false, const size_t num_pq_chunks = 0, - const bool use_opq = false, const size_t num_frozen_pts = 0, - const bool init_data_store = true); - - // Constructor for incremental index - DISKANN_DLLEXPORT Index(Metric m, const size_t dim, const size_t max_points, const bool dynamic_index, - const IndexWriteParameters &indexParameters, const uint32_t initial_search_list_size, - const uint32_t search_threads, const bool enable_tags = false, - const bool concurrent_consolidate = false, const bool pq_dist_build = false, - const size_t num_pq_chunks = 0, const bool use_opq = false); + const bool use_opq = false); + // This is called by IndexFactory which returns AbstractIndex's simplified API DISKANN_DLLEXPORT Index(const IndexConfig &index_config, std::unique_ptr> data_store /* std::unique_ptr graph_store*/); @@ -329,7 +324,6 @@ template clas private: // Distance functions Metric _dist_metric = diskann::L2; - std::shared_ptr> _distance; // Data std::unique_ptr> _data_store; diff --git a/include/index_config.h b/include/index_config.h index b291c744d..2a8e0e8ba 100644 --- a/include/index_config.h +++ b/include/index_config.h @@ -33,24 +33,23 @@ struct IndexConfig std::string tag_type; std::string data_type; + // Params for building index std::shared_ptr index_write_params; - - uint32_t search_threads; - uint32_t initial_search_list_size; + // Params for searching index + std::shared_ptr index_search_params; private: IndexConfig(DataStoreStrategy data_strategy, GraphStoreStrategy graph_strategy, Metric metric, size_t dimension, size_t max_points, size_t num_pq_chunks, size_t num_frozen_points, bool dynamic_index, bool enable_tags, bool pq_dist_build, bool concurrent_consolidate, bool use_opq, const std::string &data_type, const std::string &tag_type, const std::string &label_type, - std::shared_ptr index_write_params, uint32_t search_threads, - uint32_t initial_search_list_size) + std::shared_ptr index_write_params, + std::shared_ptr index_search_params) : data_strategy(data_strategy), graph_strategy(graph_strategy), metric(metric), dimension(dimension), max_points(max_points), dynamic_index(dynamic_index), enable_tags(enable_tags), pq_dist_build(pq_dist_build), concurrent_consolidate(concurrent_consolidate), use_opq(use_opq), num_pq_chunks(num_pq_chunks), num_frozen_pts(num_frozen_points), label_type(label_type), tag_type(tag_type), data_type(data_type), - index_write_params(index_write_params), search_threads(search_threads), - initial_search_list_size(initial_search_list_size) + index_write_params(index_write_params), index_search_params(index_search_params) { } @@ -60,9 +59,7 @@ struct IndexConfig class IndexConfigBuilder { public: - IndexConfigBuilder() - { - } + IndexConfigBuilder() = default; IndexConfigBuilder &with_metric(Metric m) { @@ -160,15 +157,31 @@ class IndexConfigBuilder return *this; } - IndexConfigBuilder &with_search_threads(uint32_t search_threads) + IndexConfigBuilder &with_index_write_params(std::shared_ptr index_write_params_ptr) + { + if (index_write_params_ptr == nullptr) + { + diskann::cout << "Passed, empty build_params while creating index config" << std::endl; + return *this; + } + this->_index_write_params = index_write_params_ptr; + return *this; + } + + IndexConfigBuilder &with_index_search_params(IndexSearchParams &search_params) { - this->_search_threads = search_threads; + this->_index_search_params = std::make_shared(search_params); return *this; } - IndexConfigBuilder &with_initial_search_list_size(uint32_t search_list_size) + IndexConfigBuilder &with_index_search_params(std::shared_ptr search_params_ptr) { - this->_initial_search_list_size = search_list_size; + if (search_params_ptr == nullptr) + { + diskann::cout << "Passed, empty search_params while creating index config" << std::endl; + return *this; + } + this->_index_search_params = search_params_ptr; return *this; } @@ -177,19 +190,20 @@ class IndexConfigBuilder if (_data_type == "" || _data_type.empty()) throw ANNException("Error: data_type can not be empty", -1); - if (_dynamic_index && _index_write_params != nullptr) + if (_dynamic_index && _num_frozen_pts == 0) { - if (_search_threads == 0) - throw ANNException("Error: please pass search_threads for building dynamic index.", -1); + _num_frozen_pts = 1; + } - if (_initial_search_list_size == 0) + if (_dynamic_index) + { + if (_index_search_params != nullptr && _index_search_params->initial_search_list_size == 0) throw ANNException("Error: please pass initial_search_list_size for building dynamic index.", -1); } return IndexConfig(_data_strategy, _graph_strategy, _metric, _dimension, _max_points, _num_pq_chunks, _num_frozen_pts, _dynamic_index, _enable_tags, _pq_dist_build, _concurrent_consolidate, - _use_opq, _data_type, _tag_type, _label_type, _index_write_params, _search_threads, - _initial_search_list_size); + _use_opq, _data_type, _tag_type, _label_type, _index_write_params, _index_search_params); } IndexConfigBuilder(const IndexConfigBuilder &) = delete; @@ -217,8 +231,6 @@ class IndexConfigBuilder std::string _data_type; std::shared_ptr _index_write_params; - - uint32_t _search_threads; - uint32_t _initial_search_list_size; + std::shared_ptr _index_search_params; }; } // namespace diskann diff --git a/include/index_factory.h b/include/index_factory.h index 3d1eb7992..7ad0893cc 100644 --- a/include/index_factory.h +++ b/include/index_factory.h @@ -10,13 +10,15 @@ class IndexFactory DISKANN_DLLEXPORT explicit IndexFactory(const IndexConfig &config); DISKANN_DLLEXPORT std::unique_ptr create_instance(); + // Consruct a data store with distance function emplaced within + template + DISKANN_DLLEXPORT static std::unique_ptr> construct_datastore(DataStoreStrategy stratagy, + size_t num_points, + size_t dimension, Metric m); + private: void check_config(); - template - std::unique_ptr> construct_datastore(DataStoreStrategy stratagy, size_t num_points, - size_t dimension); - std::unique_ptr construct_graphstore(GraphStoreStrategy stratagy, size_t size); template diff --git a/include/parameters.h b/include/parameters.h index 81a336da7..209b9128c 100644 --- a/include/parameters.h +++ b/include/parameters.h @@ -38,6 +38,17 @@ class IndexWriteParameters friend class IndexWriteParametersBuilder; }; +class IndexSearchParams +{ + public: + IndexSearchParams(const uint32_t initial_search_list_size, const uint32_t num_search_threads) + : initial_search_list_size(initial_search_list_size), num_search_threads(num_search_threads) + { + } + const uint32_t initial_search_list_size; // search L + const uint32_t num_search_threads; // search threads +}; + class IndexWriteParametersBuilder { /** diff --git a/python/include/static_disk_index.h b/python/include/static_disk_index.h index 71a1b5aff..4a399ab3e 100644 --- a/python/include/static_disk_index.h +++ b/python/include/static_disk_index.h @@ -6,7 +6,6 @@ #include #include - #include #include @@ -21,7 +20,8 @@ namespace py = pybind11; -namespace diskannpy { +namespace diskannpy +{ #ifdef _WINDOWS typedef WindowsAlignedFileReader PlatformSpecificAlignedFileReader; @@ -29,8 +29,7 @@ typedef WindowsAlignedFileReader PlatformSpecificAlignedFileReader; typedef LinuxAlignedFileReader PlatformSpecificAlignedFileReader; #endif -template -class StaticDiskIndex +template class StaticDiskIndex { public: StaticDiskIndex(diskann::Metric metric, const std::string &index_path_prefix, uint32_t num_threads, @@ -40,13 +39,15 @@ class StaticDiskIndex void cache_sample_paths(size_t num_nodes_to_cache, const std::string &warmup_query_file, uint32_t num_threads); - NeighborsAndDistances search(py::array_t &query, uint64_t knn, - uint64_t complexity, uint64_t beam_width); + NeighborsAndDistances search(py::array_t &query, + uint64_t knn, uint64_t complexity, uint64_t beam_width); + + NeighborsAndDistances batch_search( + py::array_t &queries, uint64_t num_queries, uint64_t knn, + uint64_t complexity, uint64_t beam_width, uint32_t num_threads); - NeighborsAndDistances batch_search(py::array_t &queries, uint64_t num_queries, - uint64_t knn, uint64_t complexity, uint64_t beam_width, uint32_t num_threads); private: std::shared_ptr _reader; diskann::PQFlashIndex
_index; }; -} +} // namespace diskannpy diff --git a/python/src/builder.cpp b/python/src/builder.cpp index 4485d66e6..2e593e72b 100644 --- a/python/src/builder.cpp +++ b/python/src/builder.cpp @@ -44,10 +44,15 @@ void build_memory_index(const diskann::Metric metric, const std::string &vector_ .with_saturate_graph(false) .with_num_threads(num_threads) .build(); + diskann::IndexSearchParams index_search_params = + diskann::IndexSearchParams(index_build_params.search_list_size, num_threads); size_t data_num, data_dim; diskann::get_bin_metadata(vector_bin_path, data_num, data_dim); - diskann::Index index(metric, data_dim, data_num, use_tags, use_tags, false, use_pq_build, - num_pq_bytes, use_opq); + + diskann::Index index(metric, data_dim, data_num, + std::make_shared(index_build_params), + std::make_shared(index_search_params), 0, + use_tags, use_tags, false, use_pq_build, num_pq_bytes, use_opq); if (use_tags) { diff --git a/python/src/dynamic_memory_index.cpp b/python/src/dynamic_memory_index.cpp index af276b85f..f92f4157e 100644 --- a/python/src/dynamic_memory_index.cpp +++ b/python/src/dynamic_memory_index.cpp @@ -36,14 +36,15 @@ diskann::Index dynamic_index_builder(const diskann:: { const uint32_t _initial_search_threads = initial_search_threads != 0 ? initial_search_threads : omp_get_num_threads(); + + auto index_search_params = diskann::IndexSearchParams(initial_search_complexity, _initial_search_threads); return diskann::Index( m, dimensions, max_vectors, - true, // dynamic_index - write_params, // used for insert - initial_search_complexity, // used to prepare the scratch space for searching. can / may - // be expanded if the search asks for a larger L. - _initial_search_threads, // also used for the scratch space - true, // enable_tags + std::make_shared(write_params), // index write params + std::make_shared(index_search_params), // index_search_params + write_params.num_frozen_points, // frozen_points + true, // dynamic_index + true, // enable_tags concurrent_consolidation, false, // pq_dist_build 0, // num_pq_chunks diff --git a/python/src/static_memory_index.cpp b/python/src/static_memory_index.cpp index 3bd927174..0dbb24dc3 100644 --- a/python/src/static_memory_index.cpp +++ b/python/src/static_memory_index.cpp @@ -17,15 +17,17 @@ diskann::Index static_index_builder(const diskann::Me { throw std::runtime_error("initial_search_complexity must be a positive uint32_t"); } - + auto index_search_params = diskann::IndexSearchParams(initial_search_complexity, omp_get_num_threads()); return diskann::Index
(m, dimensions, num_points, - false, // not a dynamic_index - false, // no enable_tags/ids - false, // no concurrent_consolidate, - false, // pq_dist_build - 0, // num_pq_chunks - false, // use_opq = false - 0); // num_frozen_points + nullptr, // index write params + std::make_shared(index_search_params), // index search params + 0, // num frozen points + false, // not a dynamic_index + false, // no enable_tags/ids + false, // no concurrent_consolidate, + false, // pq_dist_build + 0, // num_pq_chunks + false); // use_opq = false } template diff --git a/src/disk_utils.cpp b/src/disk_utils.cpp index 4ece797d1..6544df33a 100644 --- a/src/disk_utils.cpp +++ b/src/disk_utils.cpp @@ -635,8 +635,9 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr .with_num_threads(num_threads) .build(); using TagT = uint32_t; - diskann::Index _index(compareMetric, base_dim, base_num, false, false, false, - build_pq_bytes > 0, build_pq_bytes, use_opq); + diskann::Index _index( + compareMetric, base_dim, base_num, std::make_shared(paras), nullptr, + paras.num_frozen_points, false, false, false, build_pq_bytes > 0, build_pq_bytes, use_opq); if (!use_filters) _index.build(base_file.c_str(), base_num, paras); else @@ -696,8 +697,10 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr uint64_t shard_base_dim, shard_base_pts; get_bin_metadata(shard_base_file, shard_base_pts, shard_base_dim); - diskann::Index _index(compareMetric, shard_base_dim, shard_base_pts, false, false, false, build_pq_bytes > 0, - build_pq_bytes, use_opq); + + diskann::Index _index( + compareMetric, shard_base_dim, shard_base_pts, std::make_shared(paras), + nullptr, paras.num_frozen_points, false, false, false, build_pq_bytes > 0, build_pq_bytes, use_opq); if (!use_filters) { _index.build(shard_base_file.c_str(), shard_base_pts, paras); diff --git a/src/filter_utils.cpp b/src/filter_utils.cpp index 965762d1f..618666488 100644 --- a/src/filter_utils.cpp +++ b/src/filter_utils.cpp @@ -45,7 +45,8 @@ void generate_label_indices(path input_data_path, path final_index_path_prefix, size_t number_of_label_points, dimension; diskann::get_bin_metadata(curr_label_input_data_path, number_of_label_points, dimension); - diskann::Index index(diskann::Metric::L2, dimension, number_of_label_points, false, false); + diskann::Index index(diskann::Metric::L2, dimension, number_of_label_points, nullptr, nullptr, 0, false, + false); auto index_build_timer = std::chrono::high_resolution_clock::now(); index.build(curr_label_input_data_path.c_str(), number_of_label_points, label_index_build_parameters); diff --git a/src/in_mem_data_store.cpp b/src/in_mem_data_store.cpp index f5f973917..7d02bba17 100644 --- a/src/in_mem_data_store.cpp +++ b/src/in_mem_data_store.cpp @@ -11,8 +11,8 @@ namespace diskann template InMemDataStore::InMemDataStore(const location_t num_points, const size_t dim, - std::shared_ptr> distance_fn) - : AbstractDataStore(num_points, dim), _distance_fn(distance_fn) + std::unique_ptr> distance_fn) + : AbstractDataStore(num_points, dim), _distance_fn(std::move(distance_fn)) { _aligned_dim = ROUND_UP(dim, _distance_fn->get_required_alignment()); alloc_aligned(((void **)&_data), this->_capacity * _aligned_dim * sizeof(data_t), 8 * sizeof(data_t)); diff --git a/src/index.cpp b/src/index.cpp index eb7592a4e..eeb7169e1 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. +#include "index_factory.h" #include #include @@ -27,59 +28,38 @@ namespace diskann // Initialize an index with metric m, load the data of type T with filename // (bin), and initialize max_points template -Index::Index(Metric m, const size_t dim, const size_t max_points, const bool dynamic_index, - const IndexWriteParameters &indexParams, const uint32_t initial_search_list_size, - const uint32_t search_threads, const bool enable_tags, const bool concurrent_consolidate, - const bool pq_dist_build, const size_t num_pq_chunks, const bool use_opq) - : Index(m, dim, max_points, dynamic_index, enable_tags, concurrent_consolidate, pq_dist_build, num_pq_chunks, - use_opq, indexParams.num_frozen_points) +Index::Index(const IndexConfig &index_config, std::unique_ptr> data_store) + : _dist_metric(index_config.metric), _dim(index_config.dimension), _max_points(index_config.max_points), + _num_frozen_pts(index_config.num_frozen_pts), _dynamic_index(index_config.dynamic_index), + _enable_tags(index_config.enable_tags), _indexingMaxC(DEFAULT_MAXC), _query_scratch(nullptr), + _pq_dist(index_config.pq_dist_build), _use_opq(index_config.use_opq), _num_pq_chunks(index_config.num_pq_chunks), + _delete_set(new tsl::robin_set), _conc_consolidate(index_config.concurrent_consolidate) { - if (dynamic_index) - { - this->enable_delete(); - } - _indexingQueueSize = indexParams.search_list_size; - _indexingRange = indexParams.max_degree; - _indexingMaxC = indexParams.max_occlusion_size; - _indexingAlpha = indexParams.alpha; - _filterIndexingQueueSize = indexParams.filter_list_size; - - uint32_t num_threads_indx = indexParams.num_threads; - uint32_t num_scratch_spaces = search_threads + num_threads_indx; - initialize_query_scratch(num_scratch_spaces, initial_search_list_size, _indexingQueueSize, _indexingRange, - _indexingMaxC, dim); -} - -template -Index::Index(Metric m, const size_t dim, const size_t max_points, const bool dynamic_index, - const bool enable_tags, const bool concurrent_consolidate, const bool pq_dist_build, - const size_t num_pq_chunks, const bool use_opq, const size_t num_frozen_pts, - const bool init_data_store) - : _dist_metric(m), _dim(dim), _max_points(max_points), _num_frozen_pts(num_frozen_pts), - _dynamic_index(dynamic_index), _enable_tags(enable_tags), _indexingMaxC(DEFAULT_MAXC), _query_scratch(nullptr), - _pq_dist(pq_dist_build), _use_opq(use_opq), _num_pq_chunks(num_pq_chunks), - _delete_set(new tsl::robin_set), _conc_consolidate(concurrent_consolidate) -{ - if (dynamic_index && !enable_tags) + if (_dynamic_index && !_enable_tags) { throw ANNException("ERROR: Dynamic Indexing must have tags enabled.", -1, __FUNCSIG__, __FILE__, __LINE__); } if (_pq_dist) { - if (dynamic_index) + if (_dynamic_index) throw ANNException("ERROR: Dynamic Indexing not supported with PQ distance based " "index construction", -1, __FUNCSIG__, __FILE__, __LINE__); - if (m == diskann::Metric::INNER_PRODUCT) + if (_dist_metric == diskann::Metric::INNER_PRODUCT) throw ANNException("ERROR: Inner product metrics not yet supported " "with PQ distance " "base index", -1, __FUNCSIG__, __FILE__, __LINE__); } - if (dynamic_index && _num_frozen_pts == 0) + if (_dist_metric == diskann::Metric::COSINE && std::is_floating_point::value) + { + this->_normalize_vecs = true; + } + + if (_dynamic_index && _num_frozen_pts == 0) { _num_frozen_pts = 1; } @@ -90,7 +70,6 @@ Index::Index(Metric m, const size_t dim, const size_t max_point _max_points = 1; } const size_t total_internal_points = _max_points + _num_frozen_pts; - if (_pq_dist) { if (_num_pq_chunks > _dim) @@ -103,68 +82,63 @@ Index::Index(Metric m, const size_t dim, const size_t max_point _final_graph.resize(total_internal_points); - if (init_data_store) - { - // Issue #374: data_store is injected from index factory. Keeping this for backward compatibility. - // distance is owned by data_store - if (m == diskann::Metric::COSINE && std::is_floating_point::value) - { - // This is safe because T is float inside the if block. - this->_distance.reset((Distance *)new AVXNormalizedCosineDistanceFloat()); - this->_normalize_vecs = true; - diskann::cout << "Normalizing vectors and using L2 for cosine " - "AVXNormalizedCosineDistanceFloat()." - << std::endl; - } - else - { - this->_distance.reset((Distance *)get_distance_function(m)); - } - // Note: moved this to factory, keeping this for backward compatibility. - _data_store = - std::make_unique>((location_t)total_internal_points, _dim, this->_distance); - } + _data_store = std::move(data_store); _locks = std::vector(total_internal_points); - - if (enable_tags) + if (_enable_tags) { _location_to_tag.reserve(total_internal_points); _tag_to_location.reserve(total_internal_points); } -} - -template -Index::Index(const IndexConfig &index_config, std::unique_ptr> data_store) - : Index(index_config.metric, index_config.dimension, index_config.max_points, index_config.dynamic_index, - index_config.enable_tags, index_config.concurrent_consolidate, index_config.pq_dist_build, - index_config.num_pq_chunks, index_config.use_opq, index_config.num_frozen_pts, false) -{ - - _data_store = std::move(data_store); - _distance.reset(_data_store->get_dist_fn()); - // enable delete by default for dynamic index if (_dynamic_index) { - this->enable_delete(); - } - if (_dynamic_index && index_config.index_write_params != nullptr) - { - _indexingQueueSize = index_config.index_write_params->search_list_size; - _indexingRange = index_config.index_write_params->max_degree; - _indexingMaxC = index_config.index_write_params->max_occlusion_size; - _indexingAlpha = index_config.index_write_params->alpha; - _filterIndexingQueueSize = index_config.index_write_params->filter_list_size; + this->enable_delete(); // enable delete by default for dynamic index + // if write params are not passed, it is inffered that ctor is called by search + if (index_config.index_write_params != nullptr && index_config.index_search_params != nullptr) + { + _indexingQueueSize = index_config.index_write_params->search_list_size; + _indexingRange = index_config.index_write_params->max_degree; + _indexingMaxC = index_config.index_write_params->max_occlusion_size; + _indexingAlpha = index_config.index_write_params->alpha; + _filterIndexingQueueSize = index_config.index_write_params->filter_list_size; - uint32_t num_threads_indx = index_config.index_write_params->num_threads; - uint32_t num_scratch_spaces = index_config.search_threads + num_threads_indx; + uint32_t num_threads_indx = index_config.index_write_params->num_threads; + uint32_t num_scratch_spaces = index_config.index_search_params->num_search_threads + num_threads_indx; - initialize_query_scratch(num_scratch_spaces, index_config.initial_search_list_size, _indexingQueueSize, - _indexingRange, _indexingMaxC, _data_store->get_dims()); + initialize_query_scratch(num_scratch_spaces, index_config.index_search_params->initial_search_list_size, + _indexingQueueSize, _indexingRange, _indexingMaxC, _data_store->get_dims()); + } } } +template +Index::Index(Metric m, const size_t dim, const size_t max_points, + const std::shared_ptr index_parameters, + const std::shared_ptr index_search_params, const size_t num_frozen_pts, + const bool dynamic_index, const bool enable_tags, const bool concurrent_consolidate, + const bool pq_dist_build, const size_t num_pq_chunks, const bool use_opq) + : Index(IndexConfigBuilder() + .with_metric(m) + .with_dimension(dim) + .with_max_points(max_points) + .with_index_write_params(index_parameters) + .with_index_search_params(index_search_params) + .with_num_frozen_pts(num_frozen_pts) + .is_dynamic_index(dynamic_index) + .is_enable_tags(enable_tags) + .is_concurrent_consolidate(concurrent_consolidate) + .is_pq_dist_build(pq_dist_build) + .with_num_pq_chunks(num_pq_chunks) + .is_use_opq(use_opq) + .with_data_type(diskann_type_to_name()) + .build(), + std::move(IndexFactory::construct_datastore( + diskann::MEMORY, max_points + (dynamic_index && num_frozen_pts == 0 ? (size_t)1 : num_frozen_pts), dim, + m))) +{ +} + template Index::~Index() { // Ensure that no other activity is happening before dtor() @@ -2164,7 +2138,8 @@ std::pair Index::search(const T *query, con std::shared_lock lock(_update_lock); - _distance->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); + _data_store->get_dist_fn()->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); + auto retval = iterate_to_fixed_point(scratch->aligned_query(), L, init_ids, scratch, false, unused_filter_label, true); @@ -2266,7 +2241,7 @@ std::pair Index::search_with_filters(const // REFACTOR // T *aligned_query = scratch->aligned_query(); // memcpy(aligned_query, query, _dim * sizeof(T)); - _distance->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); + _data_store->get_dist_fn()->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); auto retval = iterate_to_fixed_point(scratch->aligned_query(), L, init_ids, scratch, true, filter_vec, true); auto best_L_nodes = scratch->best_l_nodes(); @@ -2345,7 +2320,8 @@ size_t Index::search_with_tags(const T *query, const uint64_t K const std::vector init_ids = get_init_ids(); const std::vector unused_filter_label; - _distance->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); + //_distance->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); + _data_store->get_dist_fn()->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); iterate_to_fixed_point(scratch->aligned_query(), L, init_ids, scratch, false, unused_filter_label, true); NeighborPriorityQueue &best_L_nodes = scratch->best_l_nodes(); diff --git a/src/index_factory.cpp b/src/index_factory.cpp index c5607f4a0..88ac44a16 100644 --- a/src/index_factory.cpp +++ b/src/index_factory.cpp @@ -51,22 +51,21 @@ void IndexFactory::check_config() template std::unique_ptr> IndexFactory::construct_datastore(DataStoreStrategy strategy, size_t num_points, - size_t dimension) + size_t dimension, Metric m) { - const size_t total_internal_points = num_points + _config->num_frozen_pts; - std::shared_ptr> distance; + std::unique_ptr> distance; switch (strategy) { case MEMORY: - if (_config->metric == diskann::Metric::COSINE && std::is_same::value) + if (m == diskann::Metric::COSINE && std::is_same::value) { distance.reset((Distance *)new AVXNormalizedCosineDistanceFloat()); - return std::make_unique>((location_t)total_internal_points, dimension, distance); + return std::make_unique>((location_t)num_points, dimension, std::move(distance)); } else { - distance.reset((Distance *)get_distance_function(_config->metric)); - return std::make_unique>((location_t)total_internal_points, dimension, distance); + distance.reset((Distance *)get_distance_function(m)); + return std::make_unique>((location_t)num_points, dimension, std::move(distance)); } break; default: @@ -83,10 +82,11 @@ std::unique_ptr IndexFactory::construct_graphstore(GraphStor template std::unique_ptr IndexFactory::create_instance() { - size_t num_points = _config->max_points; + size_t num_points = _config->max_points + _config->num_frozen_pts; size_t dim = _config->dimension; // auto graph_store = construct_graphstore(_config->graph_strategy, num_points); - auto data_store = construct_datastore(_config->data_strategy, num_points, dim); + auto data_store = + IndexFactory::construct_datastore(_config->data_strategy, num_points, dim, _config->metric); return std::make_unique>(*_config, std::move(data_store)); } @@ -147,4 +147,11 @@ std::unique_ptr IndexFactory::create_instance(const std::string & throw ANNException("Error: unsupported label_type please choose from [uint/ushort]", -1); } +template DISKANN_DLLEXPORT std::unique_ptr> IndexFactory::construct_datastore( + DataStoreStrategy stratagy, size_t num_points, size_t dimension, Metric m); +template DISKANN_DLLEXPORT std::unique_ptr> IndexFactory::construct_datastore( + DataStoreStrategy stratagy, size_t num_points, size_t dimension, Metric m); +template DISKANN_DLLEXPORT std::unique_ptr> IndexFactory::construct_datastore( + DataStoreStrategy stratagy, size_t num_points, size_t dimension, Metric m); + } // namespace diskann diff --git a/src/restapi/search_wrapper.cpp b/src/restapi/search_wrapper.cpp index dc9f5734e..2cbefef3f 100644 --- a/src/restapi/search_wrapper.cpp +++ b/src/restapi/search_wrapper.cpp @@ -100,7 +100,8 @@ InMemorySearch::InMemorySearch(const std::string &baseFile, const std::string { size_t dimensions, total_points = 0; diskann::get_bin_metadata(baseFile, total_points, dimensions); - _index = std::unique_ptr>(new diskann::Index(m, dimensions, total_points, false)); + _index = std::unique_ptr>( + new diskann::Index(m, dimensions, total_points, nullptr, search_l, 0, false)); _index->load(indexFile.c_str(), num_threads, search_l); }