From d53b3223c1621ada93875ca4b6fa4d651e64107c Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Thu, 10 Aug 2023 12:19:22 -0400 Subject: [PATCH 01/24] initial commit --- apps/build_memory_index.cpp | 4 +- apps/build_stitched_index.cpp | 2 +- apps/utils/count_bfs_levels.cpp | 2 +- include/index.h | 17 +-- include/index_config.h | 4 +- src/disk_utils.cpp | 6 +- src/filter_utils.cpp | 2 +- src/index.cpp | 185 +++++++++++++++++++------------- 8 files changed, 127 insertions(+), 95 deletions(-) diff --git a/apps/build_memory_index.cpp b/apps/build_memory_index.cpp index 92b269f4f..26b640368 100644 --- a/apps/build_memory_index.cpp +++ b/apps/build_memory_index.cpp @@ -40,8 +40,8 @@ int build_in_memory_index(const diskann::Metric &metric, const std::string &data size_t data_num, data_dim; diskann::get_bin_metadata(data_path, data_num, data_dim); - diskann::Index index(metric, data_dim, data_num, false, false, false, use_pq_build, num_pq_bytes, - use_opq); + diskann::Index index(metric, data_dim, data_num, nullptr, 0, 0, false, false, false, use_pq_build, + num_pq_bytes, use_opq); auto s = std::chrono::high_resolution_clock::now(); if (label_file == "") { diff --git a/apps/build_stitched_index.cpp b/apps/build_stitched_index.cpp index 80481f8b0..d5f2474f1 100644 --- a/apps/build_stitched_index.cpp +++ b/apps/build_stitched_index.cpp @@ -285,7 +285,7 @@ void prune_and_save(path final_index_path_prefix, path full_index_path_prefix, p auto pruning_index_timer = std::chrono::high_resolution_clock::now(); diskann::get_bin_metadata(input_data_path, number_of_label_points, dimension); - diskann::Index index(diskann::Metric::L2, dimension, number_of_label_points, false, false); + diskann::Index index(diskann::Metric::L2, dimension, number_of_label_points, nullptr, 0, 0, false, false); // not searching this index, set search_l to 0 index.load(full_index_path_prefix.c_str(), num_threads, 1); diff --git a/apps/utils/count_bfs_levels.cpp b/apps/utils/count_bfs_levels.cpp index ddc4eaf0b..35b50dae0 100644 --- a/apps/utils/count_bfs_levels.cpp +++ b/apps/utils/count_bfs_levels.cpp @@ -27,7 +27,7 @@ template void bfs_count(const std::string &index_path, uint32_t dat { using TagT = uint32_t; using LabelT = uint32_t; - diskann::Index index(diskann::Metric::L2, data_dims, 0, false, false); + diskann::Index index(diskann::Metric::L2, data_dims, 0, nullptr, 0, 0, false, false); std::cout << "Index class instantiated" << std::endl; index.load(index_path.c_str(), 1, 100); std::cout << "Index loaded" << std::endl; diff --git a/include/index.h b/include/index.h index 0d9b6edb9..8b7f6c1ec 100644 --- a/include/index.h +++ b/include/index.h @@ -49,18 +49,11 @@ template clas **************************************************************************/ public: - // Constructor for Bulk operations and for creating the index object solely - // for loading a prexisting index. - DISKANN_DLLEXPORT Index(Metric m, const size_t dim, const size_t max_points = 1, const bool dynamic_index = false, - const bool enable_tags = false, const bool concurrent_consolidate = false, - const bool pq_dist_build = false, const size_t num_pq_chunks = 0, - const bool use_opq = false, const size_t num_frozen_pts = 0, - const bool init_data_store = true); - - // Constructor for incremental index - DISKANN_DLLEXPORT Index(Metric m, const size_t dim, const size_t max_points, const bool dynamic_index, - const IndexWriteParameters &indexParameters, const uint32_t initial_search_list_size, - const uint32_t search_threads, const bool enable_tags = false, + // For internal use - uses new constructor internally + DISKANN_DLLEXPORT Index(Metric m, const size_t dim, const size_t max_points, + const std::shared_ptr &indexParameters, + const uint32_t initial_search_list_size, const size_t num_frozen_pts = 0, + const bool dynamic_index = false, const bool enable_tags = false, const bool concurrent_consolidate = false, const bool pq_dist_build = false, const size_t num_pq_chunks = 0, const bool use_opq = false); diff --git a/include/index_config.h b/include/index_config.h index b291c744d..b6334e650 100644 --- a/include/index_config.h +++ b/include/index_config.h @@ -60,9 +60,7 @@ struct IndexConfig class IndexConfigBuilder { public: - IndexConfigBuilder() - { - } + IndexConfigBuilder() = default; IndexConfigBuilder &with_metric(Metric m) { diff --git a/src/disk_utils.cpp b/src/disk_utils.cpp index a0e4c25ed..f0a88671d 100644 --- a/src/disk_utils.cpp +++ b/src/disk_utils.cpp @@ -635,7 +635,7 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr .with_num_threads(num_threads) .build(); using TagT = uint32_t; - diskann::Index _index(compareMetric, base_dim, base_num, false, false, false, + diskann::Index _index(compareMetric, base_dim, base_num, nullptr, 0, 0, false, false, false, build_pq_bytes > 0, build_pq_bytes, use_opq); if (!use_filters) _index.build(base_file.c_str(), base_num, paras); @@ -696,8 +696,8 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr uint64_t shard_base_dim, shard_base_pts; get_bin_metadata(shard_base_file, shard_base_pts, shard_base_dim); - diskann::Index _index(compareMetric, shard_base_dim, shard_base_pts, false, false, false, build_pq_bytes > 0, - build_pq_bytes, use_opq); + diskann::Index _index(compareMetric, shard_base_dim, shard_base_pts, nullptr, 0, 0, false, false, false, + build_pq_bytes > 0, build_pq_bytes, use_opq); if (!use_filters) { _index.build(shard_base_file.c_str(), shard_base_pts, paras); diff --git a/src/filter_utils.cpp b/src/filter_utils.cpp index 965762d1f..6887c6e54 100644 --- a/src/filter_utils.cpp +++ b/src/filter_utils.cpp @@ -45,7 +45,7 @@ void generate_label_indices(path input_data_path, path final_index_path_prefix, size_t number_of_label_points, dimension; diskann::get_bin_metadata(curr_label_input_data_path, number_of_label_points, dimension); - diskann::Index index(diskann::Metric::L2, dimension, number_of_label_points, false, false); + diskann::Index index(diskann::Metric::L2, dimension, number_of_label_points, nullptr, 0, 0, false, false); auto index_build_timer = std::chrono::high_resolution_clock::now(); index.build(curr_label_input_data_path.c_str(), number_of_label_points, label_index_build_parameters); diff --git a/src/index.cpp b/src/index.cpp index eb7592a4e..535a4f6af 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -27,59 +27,116 @@ namespace diskann // Initialize an index with metric m, load the data of type T with filename // (bin), and initialize max_points template -Index::Index(Metric m, const size_t dim, const size_t max_points, const bool dynamic_index, - const IndexWriteParameters &indexParams, const uint32_t initial_search_list_size, - const uint32_t search_threads, const bool enable_tags, const bool concurrent_consolidate, - const bool pq_dist_build, const size_t num_pq_chunks, const bool use_opq) - : Index(m, dim, max_points, dynamic_index, enable_tags, concurrent_consolidate, pq_dist_build, num_pq_chunks, - use_opq, indexParams.num_frozen_points) +Index::Index(const IndexConfig &index_config, std::unique_ptr> data_store) + : _dist_metric(index_config.metric), _dim(index_config.dimension), _max_points(index_config.max_points), + _num_frozen_pts(index_config.num_frozen_pts), _dynamic_index(index_config.dynamic_index), + _enable_tags(index_config.enable_tags), _indexingMaxC(DEFAULT_MAXC), _query_scratch(nullptr), + _pq_dist(index_config.pq_dist_build), _use_opq(index_config.use_opq), _num_pq_chunks(index_config.num_pq_chunks), + _delete_set(new tsl::robin_set), _conc_consolidate(index_config.concurrent_consolidate) { - if (dynamic_index) + + if (_dynamic_index && !_enable_tags) + { + throw ANNException("ERROR: Dynamic Indexing must have tags enabled.", -1, __FUNCSIG__, __FILE__, __LINE__); + } + + if (_pq_dist) + { + if (_dynamic_index) + throw ANNException("ERROR: Dynamic Indexing not supported with PQ distance based " + "index construction", + -1, __FUNCSIG__, __FILE__, __LINE__); + if (_dist_metric == diskann::Metric::INNER_PRODUCT) + throw ANNException("ERROR: Inner product metrics not yet supported " + "with PQ distance " + "base index", + -1, __FUNCSIG__, __FILE__, __LINE__); + } + + if (_dynamic_index && _num_frozen_pts == 0) + { + _num_frozen_pts = 1; + } + // Sanity check. While logically it is correct, max_points = 0 causes + // downstream problems. + if (_max_points == 0) + { + _max_points = 1; + } + const size_t total_internal_points = _max_points + _num_frozen_pts; + if (_pq_dist) + { + if (_num_pq_chunks > _dim) + throw diskann::ANNException("ERROR: num_pq_chunks > dim", -1, __FUNCSIG__, __FILE__, __LINE__); + alloc_aligned(((void **)&_pq_data), total_internal_points * _num_pq_chunks * sizeof(char), 8 * sizeof(char)); + std::memset(_pq_data, 0, total_internal_points * _num_pq_chunks * sizeof(char)); + } + + _start = (uint32_t)_max_points; + + _final_graph.resize(total_internal_points); + + _data_store = std::move(data_store); + _distance.reset(_data_store->get_dist_fn()); + + _locks = std::vector(total_internal_points); + if (_enable_tags) { - this->enable_delete(); + _location_to_tag.reserve(total_internal_points); + _tag_to_location.reserve(total_internal_points); } - _indexingQueueSize = indexParams.search_list_size; - _indexingRange = indexParams.max_degree; - _indexingMaxC = indexParams.max_occlusion_size; - _indexingAlpha = indexParams.alpha; - _filterIndexingQueueSize = indexParams.filter_list_size; - uint32_t num_threads_indx = indexParams.num_threads; - uint32_t num_scratch_spaces = search_threads + num_threads_indx; + if (_dynamic_index) + { + this->enable_delete(); // enable delete by default for dynamic index + // if write params are not passed, it is inffered that ctor is called by search + if (index_config.index_write_params != nullptr) + { + _indexingQueueSize = index_config.index_write_params->search_list_size; + _indexingRange = index_config.index_write_params->max_degree; + _indexingMaxC = index_config.index_write_params->max_occlusion_size; + _indexingAlpha = index_config.index_write_params->alpha; + _filterIndexingQueueSize = index_config.index_write_params->filter_list_size; + + uint32_t num_threads_indx = index_config.index_write_params->num_threads; + uint32_t num_scratch_spaces = index_config.search_threads + num_threads_indx; - initialize_query_scratch(num_scratch_spaces, initial_search_list_size, _indexingQueueSize, _indexingRange, - _indexingMaxC, dim); + initialize_query_scratch(num_scratch_spaces, index_config.initial_search_list_size, _indexingQueueSize, + _indexingRange, _indexingMaxC, _data_store->get_dims()); + } + } } template -Index::Index(Metric m, const size_t dim, const size_t max_points, const bool dynamic_index, - const bool enable_tags, const bool concurrent_consolidate, const bool pq_dist_build, - const size_t num_pq_chunks, const bool use_opq, const size_t num_frozen_pts, - const bool init_data_store) +Index::Index(Metric m, const size_t dim, const size_t max_points, + const std::shared_ptr &indexParameters, + const uint32_t initial_search_list_size, const size_t num_frozen_pts, + const bool dynamic_index, const bool enable_tags, const bool concurrent_consolidate, + const bool pq_dist_build, const size_t num_pq_chunks, const bool use_opq) : _dist_metric(m), _dim(dim), _max_points(max_points), _num_frozen_pts(num_frozen_pts), _dynamic_index(dynamic_index), _enable_tags(enable_tags), _indexingMaxC(DEFAULT_MAXC), _query_scratch(nullptr), _pq_dist(pq_dist_build), _use_opq(use_opq), _num_pq_chunks(num_pq_chunks), _delete_set(new tsl::robin_set), _conc_consolidate(concurrent_consolidate) { - if (dynamic_index && !enable_tags) + if (_dynamic_index && !_enable_tags) { throw ANNException("ERROR: Dynamic Indexing must have tags enabled.", -1, __FUNCSIG__, __FILE__, __LINE__); } if (_pq_dist) { - if (dynamic_index) + if (_dynamic_index) throw ANNException("ERROR: Dynamic Indexing not supported with PQ distance based " "index construction", -1, __FUNCSIG__, __FILE__, __LINE__); - if (m == diskann::Metric::INNER_PRODUCT) + if (_dist_metric == diskann::Metric::INNER_PRODUCT) throw ANNException("ERROR: Inner product metrics not yet supported " "with PQ distance " "base index", -1, __FUNCSIG__, __FILE__, __LINE__); } - if (dynamic_index && _num_frozen_pts == 0) + if (_dynamic_index && _num_frozen_pts == 0) { _num_frozen_pts = 1; } @@ -90,7 +147,6 @@ Index::Index(Metric m, const size_t dim, const size_t max_point _max_points = 1; } const size_t total_internal_points = _max_points + _num_frozen_pts; - if (_pq_dist) { if (_num_pq_chunks > _dim) @@ -103,65 +159,50 @@ Index::Index(Metric m, const size_t dim, const size_t max_point _final_graph.resize(total_internal_points); - if (init_data_store) + // Issue #374: data_store is injected from index factory. Keeping this for backward compatibility. + // distance is owned by data_store + if (m == diskann::Metric::COSINE && std::is_floating_point::value) { - // Issue #374: data_store is injected from index factory. Keeping this for backward compatibility. - // distance is owned by data_store - if (m == diskann::Metric::COSINE && std::is_floating_point::value) - { - // This is safe because T is float inside the if block. - this->_distance.reset((Distance *)new AVXNormalizedCosineDistanceFloat()); - this->_normalize_vecs = true; - diskann::cout << "Normalizing vectors and using L2 for cosine " - "AVXNormalizedCosineDistanceFloat()." - << std::endl; - } - else - { - this->_distance.reset((Distance *)get_distance_function(m)); - } - // Note: moved this to factory, keeping this for backward compatibility. - _data_store = - std::make_unique>((location_t)total_internal_points, _dim, this->_distance); + // This is safe because T is float inside the if block. + this->_distance.reset((Distance *)new AVXNormalizedCosineDistanceFloat()); + this->_normalize_vecs = true; + diskann::cout << "Normalizing vectors and using L2 for cosine " + "AVXNormalizedCosineDistanceFloat()." + << std::endl; + } + else + { + this->_distance.reset((Distance *)get_distance_function(m)); } + // Note: moved this to factory, keeping this for backward compatibility. + _data_store = + std::make_unique>((location_t)total_internal_points, _dim, this->_distance); _locks = std::vector(total_internal_points); - - if (enable_tags) + if (_enable_tags) { _location_to_tag.reserve(total_internal_points); _tag_to_location.reserve(total_internal_points); } -} -template -Index::Index(const IndexConfig &index_config, std::unique_ptr> data_store) - : Index(index_config.metric, index_config.dimension, index_config.max_points, index_config.dynamic_index, - index_config.enable_tags, index_config.concurrent_consolidate, index_config.pq_dist_build, - index_config.num_pq_chunks, index_config.use_opq, index_config.num_frozen_pts, false) -{ - - _data_store = std::move(data_store); - _distance.reset(_data_store->get_dist_fn()); - - // enable delete by default for dynamic index if (_dynamic_index) { - this->enable_delete(); - } - if (_dynamic_index && index_config.index_write_params != nullptr) - { - _indexingQueueSize = index_config.index_write_params->search_list_size; - _indexingRange = index_config.index_write_params->max_degree; - _indexingMaxC = index_config.index_write_params->max_occlusion_size; - _indexingAlpha = index_config.index_write_params->alpha; - _filterIndexingQueueSize = index_config.index_write_params->filter_list_size; + this->enable_delete(); // enable delete by default for dynamic index + // if write params are not passed, it is inffered that ctor is called by search + if (indexParameters != nullptr) + { + _indexingQueueSize = indexParameters->search_list_size; + _indexingRange = indexParameters->max_degree; + _indexingMaxC = indexParameters->max_occlusion_size; + _indexingAlpha = indexParameters->alpha; + _filterIndexingQueueSize = indexParameters->filter_list_size; - uint32_t num_threads_indx = index_config.index_write_params->num_threads; - uint32_t num_scratch_spaces = index_config.search_threads + num_threads_indx; + uint32_t num_threads_indx = indexParameters->num_threads; + uint32_t num_scratch_spaces = indexParameters->num_threads + num_threads_indx; - initialize_query_scratch(num_scratch_spaces, index_config.initial_search_list_size, _indexingQueueSize, - _indexingRange, _indexingMaxC, _data_store->get_dims()); + initialize_query_scratch(num_scratch_spaces, initial_search_list_size, _indexingQueueSize, _indexingRange, + _indexingMaxC, _data_store->get_dims()); + } } } From 6df72ba0e966631a607c0c0c63616e35fc61697f Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Thu, 10 Aug 2023 12:55:03 -0400 Subject: [PATCH 02/24] updating python bindings to use new ctor --- python/src/builder.cpp | 5 +++-- python/src/dynamic_memory_index.cpp | 11 +++++------ python/src/static_memory_index.cpp | 16 +++++++++------- src/disk_utils.cpp | 8 +++++--- src/restapi/search_wrapper.cpp | 3 ++- 5 files changed, 24 insertions(+), 19 deletions(-) diff --git a/python/src/builder.cpp b/python/src/builder.cpp index 4485d66e6..53b69d47a 100644 --- a/python/src/builder.cpp +++ b/python/src/builder.cpp @@ -46,8 +46,9 @@ void build_memory_index(const diskann::Metric metric, const std::string &vector_ .build(); size_t data_num, data_dim; diskann::get_bin_metadata(vector_bin_path, data_num, data_dim); - diskann::Index index(metric, data_dim, data_num, use_tags, use_tags, false, use_pq_build, - num_pq_bytes, use_opq); + diskann::Index index(metric, data_dim, data_num, + std::make_shared(index_build_params), 0, 0, + use_tags, use_tags, false, use_pq_build, num_pq_bytes, use_opq); if (use_tags) { diff --git a/python/src/dynamic_memory_index.cpp b/python/src/dynamic_memory_index.cpp index af276b85f..3ec9a5f1a 100644 --- a/python/src/dynamic_memory_index.cpp +++ b/python/src/dynamic_memory_index.cpp @@ -38,12 +38,11 @@ diskann::Index dynamic_index_builder(const diskann:: initial_search_threads != 0 ? initial_search_threads : omp_get_num_threads(); return diskann::Index( m, dimensions, max_vectors, - true, // dynamic_index - write_params, // used for insert - initial_search_complexity, // used to prepare the scratch space for searching. can / may - // be expanded if the search asks for a larger L. - _initial_search_threads, // also used for the scratch space - true, // enable_tags + std::make_shared(paras), // index write params + initial_search_complexity, // initial_search_list_size + 0, // frozen_points + true, // dynamic_index + true, // enable_tags concurrent_consolidation, false, // pq_dist_build 0, // num_pq_chunks diff --git a/python/src/static_memory_index.cpp b/python/src/static_memory_index.cpp index 3bd927174..50296fa73 100644 --- a/python/src/static_memory_index.cpp +++ b/python/src/static_memory_index.cpp @@ -19,13 +19,15 @@ diskann::Index static_index_builder(const diskann::Me } return diskann::Index
(m, dimensions, num_points, - false, // not a dynamic_index - false, // no enable_tags/ids - false, // no concurrent_consolidate, - false, // pq_dist_build - 0, // num_pq_chunks - false, // use_opq = false - 0); // num_frozen_points + nullptr, // index write params + initial_search_complexity, + 0, // num frozen points + false, // not a dynamic_index + false, // no enable_tags/ids + false, // no concurrent_consolidate, + false, // pq_dist_build + 0, // num_pq_chunks + false); // use_opq = false } template diff --git a/src/disk_utils.cpp b/src/disk_utils.cpp index f0a88671d..019ad3f9d 100644 --- a/src/disk_utils.cpp +++ b/src/disk_utils.cpp @@ -635,8 +635,9 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr .with_num_threads(num_threads) .build(); using TagT = uint32_t; - diskann::Index _index(compareMetric, base_dim, base_num, nullptr, 0, 0, false, false, false, - build_pq_bytes > 0, build_pq_bytes, use_opq); + diskann::Index _index(compareMetric, base_dim, base_num, + std::make_shared(paras), 0, 0, false, + false, false, build_pq_bytes > 0, build_pq_bytes, use_opq); if (!use_filters) _index.build(base_file.c_str(), base_num, paras); else @@ -696,7 +697,8 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr uint64_t shard_base_dim, shard_base_pts; get_bin_metadata(shard_base_file, shard_base_pts, shard_base_dim); - diskann::Index _index(compareMetric, shard_base_dim, shard_base_pts, nullptr, 0, 0, false, false, false, + diskann::Index _index(compareMetric, shard_base_dim, shard_base_pts, + std::make_shared(paras), 0, 0, false, false, false, build_pq_bytes > 0, build_pq_bytes, use_opq); if (!use_filters) { diff --git a/src/restapi/search_wrapper.cpp b/src/restapi/search_wrapper.cpp index dc9f5734e..75d1f272f 100644 --- a/src/restapi/search_wrapper.cpp +++ b/src/restapi/search_wrapper.cpp @@ -100,7 +100,8 @@ InMemorySearch::InMemorySearch(const std::string &baseFile, const std::string { size_t dimensions, total_points = 0; diskann::get_bin_metadata(baseFile, total_points, dimensions); - _index = std::unique_ptr>(new diskann::Index(m, dimensions, total_points, false)); + _index = + std::unique_ptr>(new diskann::Index(m, dimensions, total_points, nullptr, 0, 0, false)); _index->load(indexFile.c_str(), num_threads, search_l); } From c7a382aea197ef9b1141bfc9651af24a8b6b19e1 Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Thu, 10 Aug 2023 13:08:12 -0400 Subject: [PATCH 03/24] python binding error fix --- python/src/dynamic_memory_index.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/src/dynamic_memory_index.cpp b/python/src/dynamic_memory_index.cpp index 3ec9a5f1a..42fab13e0 100644 --- a/python/src/dynamic_memory_index.cpp +++ b/python/src/dynamic_memory_index.cpp @@ -38,11 +38,11 @@ diskann::Index dynamic_index_builder(const diskann:: initial_search_threads != 0 ? initial_search_threads : omp_get_num_threads(); return diskann::Index( m, dimensions, max_vectors, - std::make_shared(paras), // index write params - initial_search_complexity, // initial_search_list_size - 0, // frozen_points - true, // dynamic_index - true, // enable_tags + std::make_shared(write_params), // index write params + initial_search_complexity, // initial_search_list_size + 0, // frozen_points + true, // dynamic_index + true, // enable_tags concurrent_consolidation, false, // pq_dist_build 0, // num_pq_chunks From f4d256f7ec93b6c97237c0410af059d148fa2c45 Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Thu, 10 Aug 2023 14:14:13 -0400 Subject: [PATCH 04/24] error fix --- python/src/builder.cpp | 5 ++--- python/src/dynamic_memory_index.cpp | 2 +- src/disk_utils.cpp | 10 +++++----- src/restapi/search_wrapper.cpp | 4 ++-- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/python/src/builder.cpp b/python/src/builder.cpp index 53b69d47a..2cee7de8e 100644 --- a/python/src/builder.cpp +++ b/python/src/builder.cpp @@ -46,9 +46,8 @@ void build_memory_index(const diskann::Metric metric, const std::string &vector_ .build(); size_t data_num, data_dim; diskann::get_bin_metadata(vector_bin_path, data_num, data_dim); - diskann::Index index(metric, data_dim, data_num, - std::make_shared(index_build_params), 0, 0, - use_tags, use_tags, false, use_pq_build, num_pq_bytes, use_opq); + diskann::Index index(metric, data_dim, data_num, nullptr, 0, 0, use_tags, use_tags, false, + use_pq_build, num_pq_bytes, use_opq); if (use_tags) { diff --git a/python/src/dynamic_memory_index.cpp b/python/src/dynamic_memory_index.cpp index 42fab13e0..ca1a62dad 100644 --- a/python/src/dynamic_memory_index.cpp +++ b/python/src/dynamic_memory_index.cpp @@ -40,7 +40,7 @@ diskann::Index dynamic_index_builder(const diskann:: m, dimensions, max_vectors, std::make_shared(write_params), // index write params initial_search_complexity, // initial_search_list_size - 0, // frozen_points + write_params.num_frozen_points, // frozen_points true, // dynamic_index true, // enable_tags concurrent_consolidation, diff --git a/src/disk_utils.cpp b/src/disk_utils.cpp index 019ad3f9d..7d63846c0 100644 --- a/src/disk_utils.cpp +++ b/src/disk_utils.cpp @@ -635,9 +635,9 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr .with_num_threads(num_threads) .build(); using TagT = uint32_t; - diskann::Index _index(compareMetric, base_dim, base_num, - std::make_shared(paras), 0, 0, false, - false, false, build_pq_bytes > 0, build_pq_bytes, use_opq); + diskann::Index _index( + compareMetric, base_dim, base_num, std::make_shared(paras), 0, + paras.num_frozen_points, false, false, false, build_pq_bytes > 0, build_pq_bytes, use_opq); if (!use_filters) _index.build(base_file.c_str(), base_num, paras); else @@ -698,8 +698,8 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr uint64_t shard_base_dim, shard_base_pts; get_bin_metadata(shard_base_file, shard_base_pts, shard_base_dim); diskann::Index _index(compareMetric, shard_base_dim, shard_base_pts, - std::make_shared(paras), 0, 0, false, false, false, - build_pq_bytes > 0, build_pq_bytes, use_opq); + std::make_shared(paras), 0, paras.num_frozen_points, + false, false, false, build_pq_bytes > 0, build_pq_bytes, use_opq); if (!use_filters) { _index.build(shard_base_file.c_str(), shard_base_pts, paras); diff --git a/src/restapi/search_wrapper.cpp b/src/restapi/search_wrapper.cpp index 75d1f272f..2cbefef3f 100644 --- a/src/restapi/search_wrapper.cpp +++ b/src/restapi/search_wrapper.cpp @@ -100,8 +100,8 @@ InMemorySearch::InMemorySearch(const std::string &baseFile, const std::string { size_t dimensions, total_points = 0; diskann::get_bin_metadata(baseFile, total_points, dimensions); - _index = - std::unique_ptr>(new diskann::Index(m, dimensions, total_points, nullptr, 0, 0, false)); + _index = std::unique_ptr>( + new diskann::Index(m, dimensions, total_points, nullptr, search_l, 0, false)); _index->load(indexFile.c_str(), num_threads, search_l); } From 75d1680a448fea0c3bd6d794b454f79773247f6b Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Thu, 10 Aug 2023 14:31:10 -0400 Subject: [PATCH 05/24] reverting some changes -> experiment --- python/src/builder.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/src/builder.cpp b/python/src/builder.cpp index 2cee7de8e..1ec81f01c 100644 --- a/python/src/builder.cpp +++ b/python/src/builder.cpp @@ -46,8 +46,9 @@ void build_memory_index(const diskann::Metric metric, const std::string &vector_ .build(); size_t data_num, data_dim; diskann::get_bin_metadata(vector_bin_path, data_num, data_dim); - diskann::Index index(metric, data_dim, data_num, nullptr, 0, 0, use_tags, use_tags, false, - use_pq_build, num_pq_bytes, use_opq); + diskann::Index index( + metric, data_dim, data_num, std::make_shared(index_build_params), + index_build_params.search_list_size, 0, use_tags, use_tags, false, use_pq_build, num_pq_bytes, use_opq); if (use_tags) { From b3413e3281042f546b223f4650ae89087e690eaa Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Thu, 10 Aug 2023 17:11:21 -0400 Subject: [PATCH 06/24] removing redundnt code from native index --- include/index_config.h | 10 ++++ src/index.cpp | 115 +++++++++-------------------------------- 2 files changed, 35 insertions(+), 90 deletions(-) diff --git a/include/index_config.h b/include/index_config.h index b6334e650..9141615b6 100644 --- a/include/index_config.h +++ b/include/index_config.h @@ -158,6 +158,16 @@ class IndexConfigBuilder return *this; } + IndexConfigBuilder &with_index_write_params(const std::shared_ptr &index_write_params_ptr) + { + if (index_write_params_ptr == nullptr) + { + return *this; + } + this->_index_write_params = index_write_params_ptr; + return *this; + } + IndexConfigBuilder &with_search_threads(uint32_t search_threads) { this->_search_threads = search_threads; diff --git a/src/index.cpp b/src/index.cpp index 535a4f6af..24cff35b1 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -113,97 +113,32 @@ Index::Index(Metric m, const size_t dim, const size_t max_point const uint32_t initial_search_list_size, const size_t num_frozen_pts, const bool dynamic_index, const bool enable_tags, const bool concurrent_consolidate, const bool pq_dist_build, const size_t num_pq_chunks, const bool use_opq) - : _dist_metric(m), _dim(dim), _max_points(max_points), _num_frozen_pts(num_frozen_pts), - _dynamic_index(dynamic_index), _enable_tags(enable_tags), _indexingMaxC(DEFAULT_MAXC), _query_scratch(nullptr), - _pq_dist(pq_dist_build), _use_opq(use_opq), _num_pq_chunks(num_pq_chunks), - _delete_set(new tsl::robin_set), _conc_consolidate(concurrent_consolidate) + : Index(IndexConfigBuilder() + .with_metric(m) + .with_dimension(dim) + .with_max_points(max_points) + .with_index_write_params(indexParameters) + .with_initial_search_list_size(initial_search_list_size) + .with_num_frozen_pts(num_frozen_pts) + .is_dynamic_index(dynamic_index) + .is_enable_tags(enable_tags) + .is_concurrent_consolidate(concurrent_consolidate) + .is_pq_dist_build(pq_dist_build) + .with_num_pq_chunks(num_pq_chunks) + .is_use_opq(use_opq) + .with_data_type(diskann_type_to_name()) + .build(), + std::make_unique>( + (location_t)(max_points + num_frozen_pts), dim, [m] { // lambda to get distance + std::shared_ptr> distance; + if (m == diskann::Metric::COSINE && std::is_same::value) + { + distance.reset((Distance *)new AVXNormalizedCosineDistanceFloat()); + } + distance.reset((Distance *)get_distance_function(m)); + return distance; + }())) { - if (_dynamic_index && !_enable_tags) - { - throw ANNException("ERROR: Dynamic Indexing must have tags enabled.", -1, __FUNCSIG__, __FILE__, __LINE__); - } - - if (_pq_dist) - { - if (_dynamic_index) - throw ANNException("ERROR: Dynamic Indexing not supported with PQ distance based " - "index construction", - -1, __FUNCSIG__, __FILE__, __LINE__); - if (_dist_metric == diskann::Metric::INNER_PRODUCT) - throw ANNException("ERROR: Inner product metrics not yet supported " - "with PQ distance " - "base index", - -1, __FUNCSIG__, __FILE__, __LINE__); - } - - if (_dynamic_index && _num_frozen_pts == 0) - { - _num_frozen_pts = 1; - } - // Sanity check. While logically it is correct, max_points = 0 causes - // downstream problems. - if (_max_points == 0) - { - _max_points = 1; - } - const size_t total_internal_points = _max_points + _num_frozen_pts; - if (_pq_dist) - { - if (_num_pq_chunks > _dim) - throw diskann::ANNException("ERROR: num_pq_chunks > dim", -1, __FUNCSIG__, __FILE__, __LINE__); - alloc_aligned(((void **)&_pq_data), total_internal_points * _num_pq_chunks * sizeof(char), 8 * sizeof(char)); - std::memset(_pq_data, 0, total_internal_points * _num_pq_chunks * sizeof(char)); - } - - _start = (uint32_t)_max_points; - - _final_graph.resize(total_internal_points); - - // Issue #374: data_store is injected from index factory. Keeping this for backward compatibility. - // distance is owned by data_store - if (m == diskann::Metric::COSINE && std::is_floating_point::value) - { - // This is safe because T is float inside the if block. - this->_distance.reset((Distance *)new AVXNormalizedCosineDistanceFloat()); - this->_normalize_vecs = true; - diskann::cout << "Normalizing vectors and using L2 for cosine " - "AVXNormalizedCosineDistanceFloat()." - << std::endl; - } - else - { - this->_distance.reset((Distance *)get_distance_function(m)); - } - // Note: moved this to factory, keeping this for backward compatibility. - _data_store = - std::make_unique>((location_t)total_internal_points, _dim, this->_distance); - - _locks = std::vector(total_internal_points); - if (_enable_tags) - { - _location_to_tag.reserve(total_internal_points); - _tag_to_location.reserve(total_internal_points); - } - - if (_dynamic_index) - { - this->enable_delete(); // enable delete by default for dynamic index - // if write params are not passed, it is inffered that ctor is called by search - if (indexParameters != nullptr) - { - _indexingQueueSize = indexParameters->search_list_size; - _indexingRange = indexParameters->max_degree; - _indexingMaxC = indexParameters->max_occlusion_size; - _indexingAlpha = indexParameters->alpha; - _filterIndexingQueueSize = indexParameters->filter_list_size; - - uint32_t num_threads_indx = indexParameters->num_threads; - uint32_t num_scratch_spaces = indexParameters->num_threads + num_threads_indx; - - initialize_query_scratch(num_scratch_spaces, initial_search_list_size, _indexingQueueSize, _indexingRange, - _indexingMaxC, _data_store->get_dims()); - } - } } template Index::~Index() From 00ea6572cde11e88f0d0478d2c74f3d90690ba6a Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Thu, 10 Aug 2023 17:35:31 -0400 Subject: [PATCH 07/24] python build error fix --- src/index.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/index.cpp b/src/index.cpp index 24cff35b1..b7c518ad3 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -127,6 +127,7 @@ Index::Index(Metric m, const size_t dim, const size_t max_point .with_num_pq_chunks(num_pq_chunks) .is_use_opq(use_opq) .with_data_type(diskann_type_to_name()) + .with_search_threads(indexParameters != nullptr ? indexParameters->num_threads : 0) .build(), std::make_unique>( (location_t)(max_points + num_frozen_pts), dim, [m] { // lambda to get distance From 88080290429cc4abb748ff7f7ec2e158dc9086e6 Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Fri, 11 Aug 2023 09:35:01 -0400 Subject: [PATCH 08/24] tyring to resolve python build error --- include/index_factory.h | 8 ++++---- src/index.cpp | 12 ++---------- src/index_factory.cpp | 15 +++++++-------- 3 files changed, 13 insertions(+), 22 deletions(-) diff --git a/include/index_factory.h b/include/index_factory.h index 3d1eb7992..09f2ac441 100644 --- a/include/index_factory.h +++ b/include/index_factory.h @@ -10,13 +10,13 @@ class IndexFactory DISKANN_DLLEXPORT explicit IndexFactory(const IndexConfig &config); DISKANN_DLLEXPORT std::unique_ptr create_instance(); + template + static std::unique_ptr> construct_datastore(DataStoreStrategy stratagy, size_t num_points, + size_t dimension, Metric m); + private: void check_config(); - template - std::unique_ptr> construct_datastore(DataStoreStrategy stratagy, size_t num_points, - size_t dimension); - std::unique_ptr construct_graphstore(GraphStoreStrategy stratagy, size_t size); template diff --git a/src/index.cpp b/src/index.cpp index b7c518ad3..1b27187b2 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. +#include "index_factory.h" #include #include @@ -129,16 +130,7 @@ Index::Index(Metric m, const size_t dim, const size_t max_point .with_data_type(diskann_type_to_name()) .with_search_threads(indexParameters != nullptr ? indexParameters->num_threads : 0) .build(), - std::make_unique>( - (location_t)(max_points + num_frozen_pts), dim, [m] { // lambda to get distance - std::shared_ptr> distance; - if (m == diskann::Metric::COSINE && std::is_same::value) - { - distance.reset((Distance *)new AVXNormalizedCosineDistanceFloat()); - } - distance.reset((Distance *)get_distance_function(m)); - return distance; - }())) + std::move(IndexFactory::construct_datastore(diskann::MEMORY, max_points + num_frozen_pts, dim, m))) { } diff --git a/src/index_factory.cpp b/src/index_factory.cpp index c5607f4a0..43b303422 100644 --- a/src/index_factory.cpp +++ b/src/index_factory.cpp @@ -51,22 +51,21 @@ void IndexFactory::check_config() template std::unique_ptr> IndexFactory::construct_datastore(DataStoreStrategy strategy, size_t num_points, - size_t dimension) + size_t dimension, Metric m) { - const size_t total_internal_points = num_points + _config->num_frozen_pts; std::shared_ptr> distance; switch (strategy) { case MEMORY: - if (_config->metric == diskann::Metric::COSINE && std::is_same::value) + if (m == diskann::Metric::COSINE && std::is_same::value) { distance.reset((Distance *)new AVXNormalizedCosineDistanceFloat()); - return std::make_unique>((location_t)total_internal_points, dimension, distance); + return std::make_unique>((location_t)num_points, dimension, distance); } else { - distance.reset((Distance *)get_distance_function(_config->metric)); - return std::make_unique>((location_t)total_internal_points, dimension, distance); + distance.reset((Distance *)get_distance_function(m)); + return std::make_unique>((location_t)num_points, dimension, distance); } break; default: @@ -83,10 +82,10 @@ std::unique_ptr IndexFactory::construct_graphstore(GraphStor template std::unique_ptr IndexFactory::create_instance() { - size_t num_points = _config->max_points; + size_t num_points = _config->max_points + _config->num_frozen_pts; size_t dim = _config->dimension; // auto graph_store = construct_graphstore(_config->graph_strategy, num_points); - auto data_store = construct_datastore(_config->data_strategy, num_points, dim); + auto data_store = construct_datastore(_config->data_strategy, num_points, dim, _config->metric); return std::make_unique>(*_config, std::move(data_store)); } From c3e064f0e3f5c698258b47c61b83ec0a93177c65 Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Fri, 11 Aug 2023 10:03:25 -0400 Subject: [PATCH 09/24] attempt at python build fix --- include/index_config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/index_config.h b/include/index_config.h index 9141615b6..7b37c12d1 100644 --- a/include/index_config.h +++ b/include/index_config.h @@ -158,7 +158,7 @@ class IndexConfigBuilder return *this; } - IndexConfigBuilder &with_index_write_params(const std::shared_ptr &index_write_params_ptr) + IndexConfigBuilder &with_index_write_params(std::shared_ptr index_write_params_ptr) { if (index_write_params_ptr == nullptr) { From f4d4a3b1e53fdf310c7998ce47b24a651a3f2473 Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Fri, 11 Aug 2023 11:34:17 -0400 Subject: [PATCH 10/24] adding IndexSearchParams --- apps/build_memory_index.cpp | 5 ++-- apps/build_stitched_index.cpp | 2 +- apps/test_insert_deletes_consolidate.cpp | 4 +-- apps/test_streaming_scenario.cpp | 5 ++-- apps/utils/count_bfs_levels.cpp | 2 +- include/index.h | 9 +++--- include/index_config.h | 38 ++++++++++++------------ include/parameters.h | 11 +++++++ python/include/static_disk_index.h | 19 ++++++------ python/src/builder.cpp | 10 +++++-- python/src/dynamic_memory_index.cpp | 12 ++++---- python/src/static_memory_index.cpp | 12 ++++---- src/disk_utils.cpp | 8 ++--- src/filter_utils.cpp | 3 +- src/index.cpp | 18 ++++++----- 15 files changed, 91 insertions(+), 67 deletions(-) diff --git a/apps/build_memory_index.cpp b/apps/build_memory_index.cpp index 26b640368..a5bbe498c 100644 --- a/apps/build_memory_index.cpp +++ b/apps/build_memory_index.cpp @@ -40,8 +40,9 @@ int build_in_memory_index(const diskann::Metric &metric, const std::string &data size_t data_num, data_dim; diskann::get_bin_metadata(data_path, data_num, data_dim); - diskann::Index index(metric, data_dim, data_num, nullptr, 0, 0, false, false, false, use_pq_build, - num_pq_bytes, use_opq); + diskann::Index index(metric, data_dim, data_num, + std::make_shared(paras), nullptr, 0, false, + false, false, use_pq_build, num_pq_bytes, use_opq); auto s = std::chrono::high_resolution_clock::now(); if (label_file == "") { diff --git a/apps/build_stitched_index.cpp b/apps/build_stitched_index.cpp index d5f2474f1..069651781 100644 --- a/apps/build_stitched_index.cpp +++ b/apps/build_stitched_index.cpp @@ -285,7 +285,7 @@ void prune_and_save(path final_index_path_prefix, path full_index_path_prefix, p auto pruning_index_timer = std::chrono::high_resolution_clock::now(); diskann::get_bin_metadata(input_data_path, number_of_label_points, dimension); - diskann::Index index(diskann::Metric::L2, dimension, number_of_label_points, nullptr, 0, 0, false, false); + diskann::Index index(diskann::Metric::L2, dimension, number_of_label_points, nullptr, nullptr, 0, false, false); // not searching this index, set search_l to 0 index.load(full_index_path_prefix.c_str(), num_threads, 1); diff --git a/apps/test_insert_deletes_consolidate.cpp b/apps/test_insert_deletes_consolidate.cpp index 700f4d7b6..8999688ea 100644 --- a/apps/test_insert_deletes_consolidate.cpp +++ b/apps/test_insert_deletes_consolidate.cpp @@ -152,14 +152,14 @@ void build_incremental_index(const std::string &data_path, diskann::IndexWritePa using TagT = uint32_t; auto data_type = diskann_type_to_name(); auto tag_type = diskann_type_to_name(); + auto index_search_params = diskann::IndexSearchParams(params.search_list_size, params.num_threads); diskann::IndexConfig index_config = diskann::IndexConfigBuilder() .with_metric(diskann::L2) .with_dimension(dim) .with_max_points(max_points_to_insert) .is_dynamic_index(true) .with_index_write_params(params) - .with_search_threads(params.num_threads) - .with_initial_search_list_size(params.search_list_size) + .with_index_search_params(index_search_params) .with_data_type(data_type) .with_tag_type(tag_type) .with_data_load_store_strategy(diskann::MEMORY) diff --git a/apps/test_streaming_scenario.cpp b/apps/test_streaming_scenario.cpp index 55e4e61cf..c40ee251e 100644 --- a/apps/test_streaming_scenario.cpp +++ b/apps/test_streaming_scenario.cpp @@ -186,6 +186,7 @@ void build_incremental_index(const std::string &data_path, const uint32_t L, con .with_num_frozen_points(num_start_pts) .build(); + auto index_search_params = diskann::IndexSearchParams(L, insert_threads); diskann::IndexWriteParameters delete_params = diskann::IndexWriteParametersBuilder(L, R) .with_max_occlusion_size(C) .with_alpha(alpha) @@ -200,7 +201,6 @@ void build_incremental_index(const std::string &data_path, const uint32_t L, con diskann::cout << "metadata: file " << data_path << " has " << num_points << " points in " << dim << " dims" << std::endl; aligned_dim = ROUND_UP(dim, 8); - auto index_config = diskann::IndexConfigBuilder() .with_metric(diskann::L2) .with_dimension(dim) @@ -210,12 +210,11 @@ void build_incremental_index(const std::string &data_path, const uint32_t L, con .is_use_opq(false) .with_num_pq_chunks(0) .is_pq_dist_build(false) - .with_search_threads(insert_threads) - .with_initial_search_list_size(L) .with_tag_type(diskann_type_to_name()) .with_label_type(diskann_type_to_name()) .with_data_type(diskann_type_to_name()) .with_index_write_params(params) + .with_index_search_params(index_search_params) .with_data_load_store_strategy(diskann::MEMORY) .build(); diff --git a/apps/utils/count_bfs_levels.cpp b/apps/utils/count_bfs_levels.cpp index 35b50dae0..1ec8225db 100644 --- a/apps/utils/count_bfs_levels.cpp +++ b/apps/utils/count_bfs_levels.cpp @@ -27,7 +27,7 @@ template void bfs_count(const std::string &index_path, uint32_t dat { using TagT = uint32_t; using LabelT = uint32_t; - diskann::Index index(diskann::Metric::L2, data_dims, 0, nullptr, 0, 0, false, false); + diskann::Index index(diskann::Metric::L2, data_dims, 0, nullptr, nullptr, 0, false, false); std::cout << "Index class instantiated" << std::endl; index.load(index_path.c_str(), 1, 100); std::cout << "Index loaded" << std::endl; diff --git a/include/index.h b/include/index.h index 8b7f6c1ec..a1969ff8e 100644 --- a/include/index.h +++ b/include/index.h @@ -52,10 +52,11 @@ template clas // For internal use - uses new constructor internally DISKANN_DLLEXPORT Index(Metric m, const size_t dim, const size_t max_points, const std::shared_ptr &indexParameters, - const uint32_t initial_search_list_size, const size_t num_frozen_pts = 0, - const bool dynamic_index = false, const bool enable_tags = false, - const bool concurrent_consolidate = false, const bool pq_dist_build = false, - const size_t num_pq_chunks = 0, const bool use_opq = false); + const std::shared_ptr &indexSearchParams, + const size_t num_frozen_pts = 0, const bool dynamic_index = false, + const bool enable_tags = false, const bool concurrent_consolidate = false, + const bool pq_dist_build = false, const size_t num_pq_chunks = 0, + const bool use_opq = false); DISKANN_DLLEXPORT Index(const IndexConfig &index_config, std::unique_ptr> data_store /* std::unique_ptr graph_store*/); diff --git a/include/index_config.h b/include/index_config.h index 7b37c12d1..f9ecbf806 100644 --- a/include/index_config.h +++ b/include/index_config.h @@ -34,23 +34,20 @@ struct IndexConfig std::string data_type; std::shared_ptr index_write_params; - - uint32_t search_threads; - uint32_t initial_search_list_size; + std::shared_ptr index_search_params; private: IndexConfig(DataStoreStrategy data_strategy, GraphStoreStrategy graph_strategy, Metric metric, size_t dimension, size_t max_points, size_t num_pq_chunks, size_t num_frozen_points, bool dynamic_index, bool enable_tags, bool pq_dist_build, bool concurrent_consolidate, bool use_opq, const std::string &data_type, const std::string &tag_type, const std::string &label_type, - std::shared_ptr index_write_params, uint32_t search_threads, - uint32_t initial_search_list_size) + std::shared_ptr index_write_params, + std::shared_ptr index_search_params) : data_strategy(data_strategy), graph_strategy(graph_strategy), metric(metric), dimension(dimension), max_points(max_points), dynamic_index(dynamic_index), enable_tags(enable_tags), pq_dist_build(pq_dist_build), concurrent_consolidate(concurrent_consolidate), use_opq(use_opq), num_pq_chunks(num_pq_chunks), num_frozen_pts(num_frozen_points), label_type(label_type), tag_type(tag_type), data_type(data_type), - index_write_params(index_write_params), search_threads(search_threads), - initial_search_list_size(initial_search_list_size) + index_write_params(index_write_params), index_search_params(index_search_params) { } @@ -162,21 +159,27 @@ class IndexConfigBuilder { if (index_write_params_ptr == nullptr) { + diskann::cout << "Passed, empty build_params while creating index config" << std::endl; return *this; } this->_index_write_params = index_write_params_ptr; return *this; } - IndexConfigBuilder &with_search_threads(uint32_t search_threads) + IndexConfigBuilder &with_index_search_params(IndexSearchParams &search_params) { - this->_search_threads = search_threads; + this->_index_search_params = std::make_shared(search_params); return *this; } - IndexConfigBuilder &with_initial_search_list_size(uint32_t search_list_size) + IndexConfigBuilder &with_index_search_params(std::shared_ptr search_params_ptr) { - this->_initial_search_list_size = search_list_size; + if (search_params_ptr == nullptr) + { + diskann::cout << "Passed, empty search_params while creating index config" << std::endl; + return *this; + } + this->_index_search_params = search_params_ptr; return *this; } @@ -185,19 +188,18 @@ class IndexConfigBuilder if (_data_type == "" || _data_type.empty()) throw ANNException("Error: data_type can not be empty", -1); - if (_dynamic_index && _index_write_params != nullptr) + if (_dynamic_index && _index_search_params != nullptr) { - if (_search_threads == 0) + if (_index_search_params->num_search_threads == 0) throw ANNException("Error: please pass search_threads for building dynamic index.", -1); - if (_initial_search_list_size == 0) + if (_index_search_params->initial_search_list_size == 0) throw ANNException("Error: please pass initial_search_list_size for building dynamic index.", -1); } return IndexConfig(_data_strategy, _graph_strategy, _metric, _dimension, _max_points, _num_pq_chunks, _num_frozen_pts, _dynamic_index, _enable_tags, _pq_dist_build, _concurrent_consolidate, - _use_opq, _data_type, _tag_type, _label_type, _index_write_params, _search_threads, - _initial_search_list_size); + _use_opq, _data_type, _tag_type, _label_type, _index_write_params, _index_search_params); } IndexConfigBuilder(const IndexConfigBuilder &) = delete; @@ -225,8 +227,6 @@ class IndexConfigBuilder std::string _data_type; std::shared_ptr _index_write_params; - - uint32_t _search_threads; - uint32_t _initial_search_list_size; + std::shared_ptr _index_search_params; }; } // namespace diskann diff --git a/include/parameters.h b/include/parameters.h index 81a336da7..209b9128c 100644 --- a/include/parameters.h +++ b/include/parameters.h @@ -38,6 +38,17 @@ class IndexWriteParameters friend class IndexWriteParametersBuilder; }; +class IndexSearchParams +{ + public: + IndexSearchParams(const uint32_t initial_search_list_size, const uint32_t num_search_threads) + : initial_search_list_size(initial_search_list_size), num_search_threads(num_search_threads) + { + } + const uint32_t initial_search_list_size; // search L + const uint32_t num_search_threads; // search threads +}; + class IndexWriteParametersBuilder { /** diff --git a/python/include/static_disk_index.h b/python/include/static_disk_index.h index 71a1b5aff..4a399ab3e 100644 --- a/python/include/static_disk_index.h +++ b/python/include/static_disk_index.h @@ -6,7 +6,6 @@ #include #include - #include #include @@ -21,7 +20,8 @@ namespace py = pybind11; -namespace diskannpy { +namespace diskannpy +{ #ifdef _WINDOWS typedef WindowsAlignedFileReader PlatformSpecificAlignedFileReader; @@ -29,8 +29,7 @@ typedef WindowsAlignedFileReader PlatformSpecificAlignedFileReader; typedef LinuxAlignedFileReader PlatformSpecificAlignedFileReader; #endif -template -class StaticDiskIndex +template class StaticDiskIndex { public: StaticDiskIndex(diskann::Metric metric, const std::string &index_path_prefix, uint32_t num_threads, @@ -40,13 +39,15 @@ class StaticDiskIndex void cache_sample_paths(size_t num_nodes_to_cache, const std::string &warmup_query_file, uint32_t num_threads); - NeighborsAndDistances search(py::array_t &query, uint64_t knn, - uint64_t complexity, uint64_t beam_width); + NeighborsAndDistances search(py::array_t &query, + uint64_t knn, uint64_t complexity, uint64_t beam_width); + + NeighborsAndDistances batch_search( + py::array_t &queries, uint64_t num_queries, uint64_t knn, + uint64_t complexity, uint64_t beam_width, uint32_t num_threads); - NeighborsAndDistances batch_search(py::array_t &queries, uint64_t num_queries, - uint64_t knn, uint64_t complexity, uint64_t beam_width, uint32_t num_threads); private: std::shared_ptr _reader; diskann::PQFlashIndex
_index; }; -} +} // namespace diskannpy diff --git a/python/src/builder.cpp b/python/src/builder.cpp index 1ec81f01c..2e593e72b 100644 --- a/python/src/builder.cpp +++ b/python/src/builder.cpp @@ -44,11 +44,15 @@ void build_memory_index(const diskann::Metric metric, const std::string &vector_ .with_saturate_graph(false) .with_num_threads(num_threads) .build(); + diskann::IndexSearchParams index_search_params = + diskann::IndexSearchParams(index_build_params.search_list_size, num_threads); size_t data_num, data_dim; diskann::get_bin_metadata(vector_bin_path, data_num, data_dim); - diskann::Index index( - metric, data_dim, data_num, std::make_shared(index_build_params), - index_build_params.search_list_size, 0, use_tags, use_tags, false, use_pq_build, num_pq_bytes, use_opq); + + diskann::Index index(metric, data_dim, data_num, + std::make_shared(index_build_params), + std::make_shared(index_search_params), 0, + use_tags, use_tags, false, use_pq_build, num_pq_bytes, use_opq); if (use_tags) { diff --git a/python/src/dynamic_memory_index.cpp b/python/src/dynamic_memory_index.cpp index ca1a62dad..f92f4157e 100644 --- a/python/src/dynamic_memory_index.cpp +++ b/python/src/dynamic_memory_index.cpp @@ -36,13 +36,15 @@ diskann::Index dynamic_index_builder(const diskann:: { const uint32_t _initial_search_threads = initial_search_threads != 0 ? initial_search_threads : omp_get_num_threads(); + + auto index_search_params = diskann::IndexSearchParams(initial_search_complexity, _initial_search_threads); return diskann::Index( m, dimensions, max_vectors, - std::make_shared(write_params), // index write params - initial_search_complexity, // initial_search_list_size - write_params.num_frozen_points, // frozen_points - true, // dynamic_index - true, // enable_tags + std::make_shared(write_params), // index write params + std::make_shared(index_search_params), // index_search_params + write_params.num_frozen_points, // frozen_points + true, // dynamic_index + true, // enable_tags concurrent_consolidation, false, // pq_dist_build 0, // num_pq_chunks diff --git a/python/src/static_memory_index.cpp b/python/src/static_memory_index.cpp index 50296fa73..82464c5f1 100644 --- a/python/src/static_memory_index.cpp +++ b/python/src/static_memory_index.cpp @@ -17,13 +17,13 @@ diskann::Index static_index_builder(const diskann::Me { throw std::runtime_error("initial_search_complexity must be a positive uint32_t"); } - + auto index_search_params = diskann::IndexSearchParams(initial_search_complexity, 0); return diskann::Index
(m, dimensions, num_points, - nullptr, // index write params - initial_search_complexity, - 0, // num frozen points - false, // not a dynamic_index - false, // no enable_tags/ids + nullptr, // index write params + std::make_shared(index_search_params), // index search params + 0, // num frozen points + false, // not a dynamic_index + false, // no enable_tags/ids false, // no concurrent_consolidate, false, // pq_dist_build 0, // num_pq_chunks diff --git a/src/disk_utils.cpp b/src/disk_utils.cpp index 7d63846c0..30ca436a0 100644 --- a/src/disk_utils.cpp +++ b/src/disk_utils.cpp @@ -636,7 +636,7 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr .build(); using TagT = uint32_t; diskann::Index _index( - compareMetric, base_dim, base_num, std::make_shared(paras), 0, + compareMetric, base_dim, base_num, std::make_shared(paras), nullptr, paras.num_frozen_points, false, false, false, build_pq_bytes > 0, build_pq_bytes, use_opq); if (!use_filters) _index.build(base_file.c_str(), base_num, paras); @@ -697,9 +697,9 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr uint64_t shard_base_dim, shard_base_pts; get_bin_metadata(shard_base_file, shard_base_pts, shard_base_dim); - diskann::Index _index(compareMetric, shard_base_dim, shard_base_pts, - std::make_shared(paras), 0, paras.num_frozen_points, - false, false, false, build_pq_bytes > 0, build_pq_bytes, use_opq); + diskann::Index _index( + compareMetric, shard_base_dim, shard_base_pts, std::make_shared(paras), + nullptr, paras.num_frozen_points, false, false, false, build_pq_bytes > 0, build_pq_bytes, use_opq); if (!use_filters) { _index.build(shard_base_file.c_str(), shard_base_pts, paras); diff --git a/src/filter_utils.cpp b/src/filter_utils.cpp index 6887c6e54..618666488 100644 --- a/src/filter_utils.cpp +++ b/src/filter_utils.cpp @@ -45,7 +45,8 @@ void generate_label_indices(path input_data_path, path final_index_path_prefix, size_t number_of_label_points, dimension; diskann::get_bin_metadata(curr_label_input_data_path, number_of_label_points, dimension); - diskann::Index index(diskann::Metric::L2, dimension, number_of_label_points, nullptr, 0, 0, false, false); + diskann::Index index(diskann::Metric::L2, dimension, number_of_label_points, nullptr, nullptr, 0, false, + false); auto index_build_timer = std::chrono::high_resolution_clock::now(); index.build(curr_label_input_data_path.c_str(), number_of_label_points, label_index_build_parameters); diff --git a/src/index.cpp b/src/index.cpp index 1b27187b2..5456655f0 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -54,6 +54,11 @@ Index::Index(const IndexConfig &index_config, std::unique_ptr::value) + { + this->_normalize_vecs = true; + } + if (_dynamic_index && _num_frozen_pts == 0) { _num_frozen_pts = 1; @@ -91,7 +96,7 @@ Index::Index(const IndexConfig &index_config, std::unique_ptrenable_delete(); // enable delete by default for dynamic index // if write params are not passed, it is inffered that ctor is called by search - if (index_config.index_write_params != nullptr) + if (index_config.index_write_params != nullptr && index_config.index_search_params != nullptr) { _indexingQueueSize = index_config.index_write_params->search_list_size; _indexingRange = index_config.index_write_params->max_degree; @@ -100,10 +105,10 @@ Index::Index(const IndexConfig &index_config, std::unique_ptrfilter_list_size; uint32_t num_threads_indx = index_config.index_write_params->num_threads; - uint32_t num_scratch_spaces = index_config.search_threads + num_threads_indx; + uint32_t num_scratch_spaces = index_config.index_search_params->num_search_threads + num_threads_indx; - initialize_query_scratch(num_scratch_spaces, index_config.initial_search_list_size, _indexingQueueSize, - _indexingRange, _indexingMaxC, _data_store->get_dims()); + initialize_query_scratch(num_scratch_spaces, index_config.index_search_params->initial_search_list_size, + _indexingQueueSize, _indexingRange, _indexingMaxC, _data_store->get_dims()); } } } @@ -111,7 +116,7 @@ Index::Index(const IndexConfig &index_config, std::unique_ptr Index::Index(Metric m, const size_t dim, const size_t max_points, const std::shared_ptr &indexParameters, - const uint32_t initial_search_list_size, const size_t num_frozen_pts, + const std::shared_ptr &indexSearchParams, const size_t num_frozen_pts, const bool dynamic_index, const bool enable_tags, const bool concurrent_consolidate, const bool pq_dist_build, const size_t num_pq_chunks, const bool use_opq) : Index(IndexConfigBuilder() @@ -119,7 +124,7 @@ Index::Index(Metric m, const size_t dim, const size_t max_point .with_dimension(dim) .with_max_points(max_points) .with_index_write_params(indexParameters) - .with_initial_search_list_size(initial_search_list_size) + .with_index_search_params(indexSearchParams) .with_num_frozen_pts(num_frozen_pts) .is_dynamic_index(dynamic_index) .is_enable_tags(enable_tags) @@ -128,7 +133,6 @@ Index::Index(Metric m, const size_t dim, const size_t max_point .with_num_pq_chunks(num_pq_chunks) .is_use_opq(use_opq) .with_data_type(diskann_type_to_name()) - .with_search_threads(indexParameters != nullptr ? indexParameters->num_threads : 0) .build(), std::move(IndexFactory::construct_datastore(diskann::MEMORY, max_points + num_frozen_pts, dim, m))) { From 8fd6a681831e679e97baadaa7c64efac9cbe8d36 Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Fri, 11 Aug 2023 11:52:44 -0400 Subject: [PATCH 11/24] setting search threads to non zero --- python/src/static_memory_index.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/src/static_memory_index.cpp b/python/src/static_memory_index.cpp index 82464c5f1..0dbb24dc3 100644 --- a/python/src/static_memory_index.cpp +++ b/python/src/static_memory_index.cpp @@ -17,7 +17,7 @@ diskann::Index static_index_builder(const diskann::Me { throw std::runtime_error("initial_search_complexity must be a positive uint32_t"); } - auto index_search_params = diskann::IndexSearchParams(initial_search_complexity, 0); + auto index_search_params = diskann::IndexSearchParams(initial_search_complexity, omp_get_num_threads()); return diskann::Index
(m, dimensions, num_points, nullptr, // index write params std::make_shared(index_search_params), // index search params From 42e43bac89072207b20af6443004a6abc5be08b2 Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Fri, 11 Aug 2023 12:05:52 -0400 Subject: [PATCH 12/24] minor check removed --- include/index_config.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/index_config.h b/include/index_config.h index f9ecbf806..02537c661 100644 --- a/include/index_config.h +++ b/include/index_config.h @@ -190,9 +190,6 @@ class IndexConfigBuilder if (_dynamic_index && _index_search_params != nullptr) { - if (_index_search_params->num_search_threads == 0) - throw ANNException("Error: please pass search_threads for building dynamic index.", -1); - if (_index_search_params->initial_search_list_size == 0) throw ANNException("Error: please pass initial_search_list_size for building dynamic index.", -1); } From 1ab5bdcefbf8d617d23e5d2a96d94bd26a42ff75 Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Fri, 11 Aug 2023 13:53:16 -0400 Subject: [PATCH 13/24] eperiment 3-> making distance fully owned by data_store --- include/index.h | 2 +- src/index.cpp | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/index.h b/include/index.h index a1969ff8e..c8c4ee8b9 100644 --- a/include/index.h +++ b/include/index.h @@ -323,7 +323,7 @@ template clas private: // Distance functions Metric _dist_metric = diskann::L2; - std::shared_ptr> _distance; + //std::shared_ptr> _distance; // Data std::unique_ptr> _data_store; diff --git a/src/index.cpp b/src/index.cpp index 5456655f0..606fd5bfa 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -83,7 +83,7 @@ Index::Index(const IndexConfig &index_config, std::unique_ptrget_dist_fn()); + //_distance.reset(_data_store->get_dist_fn()); _locks = std::vector(total_internal_points); if (_enable_tags) @@ -2137,7 +2137,8 @@ std::pair Index::search(const T *query, con std::shared_lock lock(_update_lock); - _distance->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); + _data_store->get_dist_fn()->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); + //_distance->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); auto retval = iterate_to_fixed_point(scratch->aligned_query(), L, init_ids, scratch, false, unused_filter_label, true); @@ -2239,7 +2240,8 @@ std::pair Index::search_with_filters(const // REFACTOR // T *aligned_query = scratch->aligned_query(); // memcpy(aligned_query, query, _dim * sizeof(T)); - _distance->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); + _data_store->get_dist_fn()->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); + //_distance->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); auto retval = iterate_to_fixed_point(scratch->aligned_query(), L, init_ids, scratch, true, filter_vec, true); auto best_L_nodes = scratch->best_l_nodes(); @@ -2318,7 +2320,8 @@ size_t Index::search_with_tags(const T *query, const uint64_t K const std::vector init_ids = get_init_ids(); const std::vector unused_filter_label; - _distance->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); + //_distance->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); + _data_store->get_dist_fn()->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); iterate_to_fixed_point(scratch->aligned_query(), L, init_ids, scratch, false, unused_filter_label, true); NeighborPriorityQueue &best_L_nodes = scratch->best_l_nodes(); From 9e2a01d7d2b08ea4951041e535acc09fcdb4e04d Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Fri, 11 Aug 2023 13:54:42 -0400 Subject: [PATCH 14/24] exp 3 clang fix --- include/index.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/index.h b/include/index.h index c8c4ee8b9..5fa94ed48 100644 --- a/include/index.h +++ b/include/index.h @@ -323,7 +323,7 @@ template clas private: // Distance functions Metric _dist_metric = diskann::L2; - //std::shared_ptr> _distance; + // std::shared_ptr> _distance; // Data std::unique_ptr> _data_store; From 8267402ef35569894cdb2befe1942d1248cc404f Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Fri, 11 Aug 2023 14:32:25 -0400 Subject: [PATCH 15/24] exp 4 --- include/index.h | 11 +++++------ src/index.cpp | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/include/index.h b/include/index.h index 5fa94ed48..c8a2a9b1c 100644 --- a/include/index.h +++ b/include/index.h @@ -51,12 +51,11 @@ template clas public: // For internal use - uses new constructor internally DISKANN_DLLEXPORT Index(Metric m, const size_t dim, const size_t max_points, - const std::shared_ptr &indexParameters, - const std::shared_ptr &indexSearchParams, - const size_t num_frozen_pts = 0, const bool dynamic_index = false, - const bool enable_tags = false, const bool concurrent_consolidate = false, - const bool pq_dist_build = false, const size_t num_pq_chunks = 0, - const bool use_opq = false); + const std::shared_ptr indexParameters, + const std::shared_ptr indexSearchParams, const size_t num_frozen_pts = 0, + const bool dynamic_index = false, const bool enable_tags = false, + const bool concurrent_consolidate = false, const bool pq_dist_build = false, + const size_t num_pq_chunks = 0, const bool use_opq = false); DISKANN_DLLEXPORT Index(const IndexConfig &index_config, std::unique_ptr> data_store /* std::unique_ptr graph_store*/); diff --git a/src/index.cpp b/src/index.cpp index 606fd5bfa..b5407d364 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -115,8 +115,8 @@ Index::Index(const IndexConfig &index_config, std::unique_ptr Index::Index(Metric m, const size_t dim, const size_t max_points, - const std::shared_ptr &indexParameters, - const std::shared_ptr &indexSearchParams, const size_t num_frozen_pts, + const std::shared_ptr indexParameters, + const std::shared_ptr indexSearchParams, const size_t num_frozen_pts, const bool dynamic_index, const bool enable_tags, const bool concurrent_consolidate, const bool pq_dist_build, const size_t num_pq_chunks, const bool use_opq) : Index(IndexConfigBuilder() From 406862e3c5fe5e6a9df92df34dd9f8fdd1961827 Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Fri, 11 Aug 2023 15:47:51 -0400 Subject: [PATCH 16/24] making distance as unique_ptr --- include/in_mem_data_store.h | 4 ++-- include/index.h | 4 ++-- src/disk_utils.cpp | 1 + src/in_mem_data_store.cpp | 4 ++-- src/index.cpp | 1 - src/index_factory.cpp | 7 ++++--- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/include/in_mem_data_store.h b/include/in_mem_data_store.h index 0509b3b82..9b6968b03 100644 --- a/include/in_mem_data_store.h +++ b/include/in_mem_data_store.h @@ -21,7 +21,7 @@ namespace diskann template class InMemDataStore : public AbstractDataStore { public: - InMemDataStore(const location_t capacity, const size_t dim, std::shared_ptr> distance_fn); + InMemDataStore(const location_t capacity, const size_t dim, std::unique_ptr> distance_fn); virtual ~InMemDataStore(); virtual location_t load(const std::string &filename) override; @@ -73,7 +73,7 @@ template class InMemDataStore : public AbstractDataStore> _distance_fn; + std::unique_ptr> _distance_fn; // in case we need to save vector norms for optimization std::shared_ptr _pre_computed_norms; diff --git a/include/index.h b/include/index.h index c8a2a9b1c..6eb365500 100644 --- a/include/index.h +++ b/include/index.h @@ -49,7 +49,7 @@ template clas **************************************************************************/ public: - // For internal use - uses new constructor internally + // For index level use - uses new constructor internally DISKANN_DLLEXPORT Index(Metric m, const size_t dim, const size_t max_points, const std::shared_ptr indexParameters, const std::shared_ptr indexSearchParams, const size_t num_frozen_pts = 0, @@ -57,6 +57,7 @@ template clas const bool concurrent_consolidate = false, const bool pq_dist_build = false, const size_t num_pq_chunks = 0, const bool use_opq = false); + // use functionality compatible with abstract index DISKANN_DLLEXPORT Index(const IndexConfig &index_config, std::unique_ptr> data_store /* std::unique_ptr graph_store*/); @@ -322,7 +323,6 @@ template clas private: // Distance functions Metric _dist_metric = diskann::L2; - // std::shared_ptr> _distance; // Data std::unique_ptr> _data_store; diff --git a/src/disk_utils.cpp b/src/disk_utils.cpp index 30ca436a0..84c2a654c 100644 --- a/src/disk_utils.cpp +++ b/src/disk_utils.cpp @@ -697,6 +697,7 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr uint64_t shard_base_dim, shard_base_pts; get_bin_metadata(shard_base_file, shard_base_pts, shard_base_dim); + diskann::Index _index( compareMetric, shard_base_dim, shard_base_pts, std::make_shared(paras), nullptr, paras.num_frozen_points, false, false, false, build_pq_bytes > 0, build_pq_bytes, use_opq); diff --git a/src/in_mem_data_store.cpp b/src/in_mem_data_store.cpp index f5f973917..7d02bba17 100644 --- a/src/in_mem_data_store.cpp +++ b/src/in_mem_data_store.cpp @@ -11,8 +11,8 @@ namespace diskann template InMemDataStore::InMemDataStore(const location_t num_points, const size_t dim, - std::shared_ptr> distance_fn) - : AbstractDataStore(num_points, dim), _distance_fn(distance_fn) + std::unique_ptr> distance_fn) + : AbstractDataStore(num_points, dim), _distance_fn(std::move(distance_fn)) { _aligned_dim = ROUND_UP(dim, _distance_fn->get_required_alignment()); alloc_aligned(((void **)&_data), this->_capacity * _aligned_dim * sizeof(data_t), 8 * sizeof(data_t)); diff --git a/src/index.cpp b/src/index.cpp index b5407d364..26937b4f6 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -83,7 +83,6 @@ Index::Index(const IndexConfig &index_config, std::unique_ptrget_dist_fn()); _locks = std::vector(total_internal_points); if (_enable_tags) diff --git a/src/index_factory.cpp b/src/index_factory.cpp index 43b303422..5507117a4 100644 --- a/src/index_factory.cpp +++ b/src/index_factory.cpp @@ -53,19 +53,20 @@ template std::unique_ptr> IndexFactory::construct_datastore(DataStoreStrategy strategy, size_t num_points, size_t dimension, Metric m) { - std::shared_ptr> distance; + std::unique_ptr> distance; switch (strategy) { case MEMORY: if (m == diskann::Metric::COSINE && std::is_same::value) { distance.reset((Distance *)new AVXNormalizedCosineDistanceFloat()); - return std::make_unique>((location_t)num_points, dimension, distance); + + return std::make_unique>((location_t)num_points, dimension, std::move(distance)); } else { distance.reset((Distance *)get_distance_function(m)); - return std::make_unique>((location_t)num_points, dimension, distance); + return std::make_unique>((location_t)num_points, dimension, std::move(distance)); } break; default: From df0310c03a481708cb3e55c0ced45fc6848b448c Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Fri, 11 Aug 2023 18:16:38 -0400 Subject: [PATCH 17/24] trying to fix build --- include/index_config.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/index_config.h b/include/index_config.h index 02537c661..d08579e16 100644 --- a/include/index_config.h +++ b/include/index_config.h @@ -188,6 +188,11 @@ class IndexConfigBuilder if (_data_type == "" || _data_type.empty()) throw ANNException("Error: data_type can not be empty", -1); + if (_dynamic_index && _num_frozen_pts == 0) + { + _num_frozen_pts = 1; + } + if (_dynamic_index && _index_search_params != nullptr) { if (_index_search_params->initial_search_list_size == 0) From 9c7f5caac82af44dc959ed7c66f0d45b4b1fda28 Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Fri, 11 Aug 2023 19:24:06 -0400 Subject: [PATCH 18/24] finally fixing problem --- src/index.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/index.cpp b/src/index.cpp index 26937b4f6..e2b28f02d 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -133,7 +133,8 @@ Index::Index(Metric m, const size_t dim, const size_t max_point .is_use_opq(use_opq) .with_data_type(diskann_type_to_name()) .build(), - std::move(IndexFactory::construct_datastore(diskann::MEMORY, max_points + num_frozen_pts, dim, m))) + std::move(IndexFactory::construct_datastore( + diskann::MEMORY, max_points + (dynamic_index && num_frozen_pts == 0 ? 1 : num_frozen_pts), dim, m))) { } From e829125499108c78ac1b3b4c2bb07a35d778516c Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Sat, 12 Aug 2023 16:30:39 -0400 Subject: [PATCH 19/24] some minor fix --- src/index.cpp | 3 ++- src/index_factory.cpp | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/index.cpp b/src/index.cpp index e2b28f02d..0f56e557e 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -134,7 +134,8 @@ Index::Index(Metric m, const size_t dim, const size_t max_point .with_data_type(diskann_type_to_name()) .build(), std::move(IndexFactory::construct_datastore( - diskann::MEMORY, max_points + (dynamic_index && num_frozen_pts == 0 ? 1 : num_frozen_pts), dim, m))) + diskann::MEMORY, max_points + (dynamic_index && num_frozen_pts == 0 ? (size_t)1 : num_frozen_pts), dim, + m))) { } diff --git a/src/index_factory.cpp b/src/index_factory.cpp index 5507117a4..013a47d3d 100644 --- a/src/index_factory.cpp +++ b/src/index_factory.cpp @@ -60,7 +60,6 @@ std::unique_ptr> IndexFactory::construct_datastore(DataStor if (m == diskann::Metric::COSINE && std::is_same::value) { distance.reset((Distance *)new AVXNormalizedCosineDistanceFloat()); - return std::make_unique>((location_t)num_points, dimension, std::move(distance)); } else @@ -86,7 +85,8 @@ std::unique_ptr IndexFactory::create_instance() size_t num_points = _config->max_points + _config->num_frozen_pts; size_t dim = _config->dimension; // auto graph_store = construct_graphstore(_config->graph_strategy, num_points); - auto data_store = construct_datastore(_config->data_strategy, num_points, dim, _config->metric); + auto data_store = + IndexFactory::construct_datastore(_config->data_strategy, num_points, dim, _config->metric); return std::make_unique>(*_config, std::move(data_store)); } From e37505d1748d93eaa64e3274a4bf828405a27955 Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Sat, 12 Aug 2023 17:01:31 -0400 Subject: [PATCH 20/24] adding dll export to index_factory static function --- include/index_factory.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/index_factory.h b/include/index_factory.h index 09f2ac441..dd839e07f 100644 --- a/include/index_factory.h +++ b/include/index_factory.h @@ -11,8 +11,9 @@ class IndexFactory DISKANN_DLLEXPORT std::unique_ptr create_instance(); template - static std::unique_ptr> construct_datastore(DataStoreStrategy stratagy, size_t num_points, - size_t dimension, Metric m); + DISKANN_DLLEXPORT static std::unique_ptr> construct_datastore(DataStoreStrategy stratagy, + size_t num_points, + size_t dimension, Metric m); private: void check_config(); From 78d778c0bc48f1e189739e924f745df66dec40db Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Sat, 12 Aug 2023 17:17:11 -0400 Subject: [PATCH 21/24] adding dll export for static fn in index_factory --- src/index_factory.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/index_factory.cpp b/src/index_factory.cpp index 013a47d3d..88ac44a16 100644 --- a/src/index_factory.cpp +++ b/src/index_factory.cpp @@ -147,4 +147,11 @@ std::unique_ptr IndexFactory::create_instance(const std::string & throw ANNException("Error: unsupported label_type please choose from [uint/ushort]", -1); } +template DISKANN_DLLEXPORT std::unique_ptr> IndexFactory::construct_datastore( + DataStoreStrategy stratagy, size_t num_points, size_t dimension, Metric m); +template DISKANN_DLLEXPORT std::unique_ptr> IndexFactory::construct_datastore( + DataStoreStrategy stratagy, size_t num_points, size_t dimension, Metric m); +template DISKANN_DLLEXPORT std::unique_ptr> IndexFactory::construct_datastore( + DataStoreStrategy stratagy, size_t num_points, size_t dimension, Metric m); + } // namespace diskann From 3574428cc802a6eee189f703213848ccf8e59bbe Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Mon, 14 Aug 2023 09:48:45 -0400 Subject: [PATCH 22/24] code cleanup --- include/index.h | 4 ++-- include/index_config.h | 2 ++ include/index_factory.h | 1 + src/index.cpp | 3 +-- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/index.h b/include/index.h index 6eb365500..1288069dd 100644 --- a/include/index.h +++ b/include/index.h @@ -49,7 +49,7 @@ template clas **************************************************************************/ public: - // For index level use - uses new constructor internally + // Call this when creating and passing Index Config is inconvenient. DISKANN_DLLEXPORT Index(Metric m, const size_t dim, const size_t max_points, const std::shared_ptr indexParameters, const std::shared_ptr indexSearchParams, const size_t num_frozen_pts = 0, @@ -57,7 +57,7 @@ template clas const bool concurrent_consolidate = false, const bool pq_dist_build = false, const size_t num_pq_chunks = 0, const bool use_opq = false); - // use functionality compatible with abstract index + // This is called by IndexFactory which returns AbstractIndex's simplified API DISKANN_DLLEXPORT Index(const IndexConfig &index_config, std::unique_ptr> data_store /* std::unique_ptr graph_store*/); diff --git a/include/index_config.h b/include/index_config.h index d08579e16..b4af308c2 100644 --- a/include/index_config.h +++ b/include/index_config.h @@ -33,7 +33,9 @@ struct IndexConfig std::string tag_type; std::string data_type; + // Params for building index std::shared_ptr index_write_params; + // Params for searching index std::shared_ptr index_search_params; private: diff --git a/include/index_factory.h b/include/index_factory.h index dd839e07f..7ad0893cc 100644 --- a/include/index_factory.h +++ b/include/index_factory.h @@ -10,6 +10,7 @@ class IndexFactory DISKANN_DLLEXPORT explicit IndexFactory(const IndexConfig &config); DISKANN_DLLEXPORT std::unique_ptr create_instance(); + // Consruct a data store with distance function emplaced within template DISKANN_DLLEXPORT static std::unique_ptr> construct_datastore(DataStoreStrategy stratagy, size_t num_points, diff --git a/src/index.cpp b/src/index.cpp index 0f56e557e..574b9fb39 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -2139,7 +2139,7 @@ std::pair Index::search(const T *query, con std::shared_lock lock(_update_lock); _data_store->get_dist_fn()->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); - //_distance->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); + auto retval = iterate_to_fixed_point(scratch->aligned_query(), L, init_ids, scratch, false, unused_filter_label, true); @@ -2242,7 +2242,6 @@ std::pair Index::search_with_filters(const // T *aligned_query = scratch->aligned_query(); // memcpy(aligned_query, query, _dim * sizeof(T)); _data_store->get_dist_fn()->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); - //_distance->preprocess_query(query, _data_store->get_dims(), scratch->aligned_query()); auto retval = iterate_to_fixed_point(scratch->aligned_query(), L, init_ids, scratch, true, filter_vec, true); auto best_L_nodes = scratch->best_l_nodes(); From f63caeeec991cfbe6769f41fc6a39812bed77633 Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Tue, 15 Aug 2023 14:29:29 -0400 Subject: [PATCH 23/24] resolving gopal's comments --- include/index.h | 11 ++++++----- include/index_config.h | 6 ++++-- src/index.cpp | 8 ++++---- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/include/index.h b/include/index.h index 1288069dd..095d1599a 100644 --- a/include/index.h +++ b/include/index.h @@ -51,11 +51,12 @@ template clas public: // Call this when creating and passing Index Config is inconvenient. DISKANN_DLLEXPORT Index(Metric m, const size_t dim, const size_t max_points, - const std::shared_ptr indexParameters, - const std::shared_ptr indexSearchParams, const size_t num_frozen_pts = 0, - const bool dynamic_index = false, const bool enable_tags = false, - const bool concurrent_consolidate = false, const bool pq_dist_build = false, - const size_t num_pq_chunks = 0, const bool use_opq = false); + const std::shared_ptr index_parameters, + const std::shared_ptr index_search_params, + const size_t num_frozen_pts = 0, const bool dynamic_index = false, + const bool enable_tags = false, const bool concurrent_consolidate = false, + const bool pq_dist_build = false, const size_t num_pq_chunks = 0, + const bool use_opq = false); // This is called by IndexFactory which returns AbstractIndex's simplified API DISKANN_DLLEXPORT Index(const IndexConfig &index_config, std::unique_ptr> data_store diff --git a/include/index_config.h b/include/index_config.h index b4af308c2..75a266ebb 100644 --- a/include/index_config.h +++ b/include/index_config.h @@ -195,10 +195,12 @@ class IndexConfigBuilder _num_frozen_pts = 1; } - if (_dynamic_index && _index_search_params != nullptr) + if (_dynamic_index) { - if (_index_search_params->initial_search_list_size == 0) + if (_index_search_params != nullptr && _index_search_params->initial_search_list_size == 0) throw ANNException("Error: please pass initial_search_list_size for building dynamic index.", -1); + if (_index_write_params == nullptr) + throw ANNException("Error: please pass index_write_params for dynamic index", -1); } return IndexConfig(_data_strategy, _graph_strategy, _metric, _dimension, _max_points, _num_pq_chunks, diff --git a/src/index.cpp b/src/index.cpp index 574b9fb39..eeb7169e1 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -114,16 +114,16 @@ Index::Index(const IndexConfig &index_config, std::unique_ptr Index::Index(Metric m, const size_t dim, const size_t max_points, - const std::shared_ptr indexParameters, - const std::shared_ptr indexSearchParams, const size_t num_frozen_pts, + const std::shared_ptr index_parameters, + const std::shared_ptr index_search_params, const size_t num_frozen_pts, const bool dynamic_index, const bool enable_tags, const bool concurrent_consolidate, const bool pq_dist_build, const size_t num_pq_chunks, const bool use_opq) : Index(IndexConfigBuilder() .with_metric(m) .with_dimension(dim) .with_max_points(max_points) - .with_index_write_params(indexParameters) - .with_index_search_params(indexSearchParams) + .with_index_write_params(index_parameters) + .with_index_search_params(index_search_params) .with_num_frozen_pts(num_frozen_pts) .is_dynamic_index(dynamic_index) .is_enable_tags(enable_tags) From c4aec961115e56272c39b6f00ee2f279e690675c Mon Sep 17 00:00:00 2001 From: yashpatel007 Date: Tue, 15 Aug 2023 15:06:43 -0400 Subject: [PATCH 24/24] resolving build failures --- apps/build_memory_index.cpp | 45 ------------------------------------- include/index_config.h | 2 -- 2 files changed, 47 deletions(-) diff --git a/apps/build_memory_index.cpp b/apps/build_memory_index.cpp index a5bbe498c..1d6f0e7c6 100644 --- a/apps/build_memory_index.cpp +++ b/apps/build_memory_index.cpp @@ -22,51 +22,6 @@ namespace po = boost::program_options; -template -int build_in_memory_index(const diskann::Metric &metric, const std::string &data_path, const uint32_t R, - const uint32_t L, const float alpha, const std::string &save_path, const uint32_t num_threads, - const bool use_pq_build, const size_t num_pq_bytes, const bool use_opq, - const std::string &label_file, const std::string &universal_label, const uint32_t Lf) -{ - diskann::IndexWriteParameters paras = diskann::IndexWriteParametersBuilder(L, R) - .with_filter_list_size(Lf) - .with_alpha(alpha) - .with_saturate_graph(false) - .with_num_threads(num_threads) - .build(); - std::string labels_file_to_use = save_path + "_label_formatted.txt"; - std::string mem_labels_int_map_file = save_path + "_labels_map.txt"; - - size_t data_num, data_dim; - diskann::get_bin_metadata(data_path, data_num, data_dim); - - diskann::Index index(metric, data_dim, data_num, - std::make_shared(paras), nullptr, 0, false, - false, false, use_pq_build, num_pq_bytes, use_opq); - auto s = std::chrono::high_resolution_clock::now(); - if (label_file == "") - { - index.build(data_path.c_str(), data_num, paras); - } - else - { - convert_labels_string_to_int(label_file, labels_file_to_use, mem_labels_int_map_file, universal_label); - if (universal_label != "") - { - LabelT unv_label_as_num = 0; - index.set_universal_label(unv_label_as_num); - } - index.build_filtered_index(data_path.c_str(), labels_file_to_use, data_num, paras); - } - std::chrono::duration diff = std::chrono::high_resolution_clock::now() - s; - - std::cout << "Indexing time: " << diff.count() << "\n"; - index.save(save_path.c_str()); - if (label_file != "") - std::remove(labels_file_to_use.c_str()); - return 0; -} - int main(int argc, char **argv) { std::string data_type, dist_fn, data_path, index_path_prefix, label_file, universal_label, label_type; diff --git a/include/index_config.h b/include/index_config.h index 75a266ebb..2a8e0e8ba 100644 --- a/include/index_config.h +++ b/include/index_config.h @@ -199,8 +199,6 @@ class IndexConfigBuilder { if (_index_search_params != nullptr && _index_search_params->initial_search_list_size == 0) throw ANNException("Error: please pass initial_search_list_size for building dynamic index.", -1); - if (_index_write_params == nullptr) - throw ANNException("Error: please pass index_write_params for dynamic index", -1); } return IndexConfig(_data_strategy, _graph_strategy, _metric, _dimension, _max_points, _num_pq_chunks,