From 7561009932ff109ed386c4f5d50983859e49b9e7 Mon Sep 17 00:00:00 2001 From: Dax Pryce Date: Wed, 12 Jul 2023 13:38:06 -0700 Subject: [PATCH] Removed the logger and verified that the logging capability is the root cause of our consistent segfault errors in python. Perhaps it also will fix any issues in our label test too? I'd like to push it to GH and see. --- apps/build_disk_index.cpp | 6 +- apps/build_memory_index.cpp | 4 +- apps/build_stitched_index.cpp | 4 +- apps/range_search_disk_index.cpp | 48 +++--- apps/search_disk_index.cpp | 56 +++---- apps/search_memory_index.cpp | 8 +- apps/test_streaming_scenario.cpp | 6 +- apps/utils/compute_groundtruth.cpp | 8 +- .../utils/compute_groundtruth_for_filters.cpp | 12 +- apps/utils/count_bfs_levels.cpp | 2 +- apps/utils/rand_data_gen.cpp | 2 +- include/cached_io.h | 9 +- include/concurrent_queue.h | 2 +- include/filter_utils.h | 4 +- include/logger.h | 30 ---- include/logger_impl.h | 65 -------- include/utils.h | 79 +++++----- src/CMakeLists.txt | 2 +- src/disk_utils.cpp | 87 ++++++----- src/distance.cpp | 35 +++-- src/filter_utils.cpp | 4 +- src/in_mem_data_store.cpp | 6 +- src/index.cpp | 142 +++++++++--------- src/linux_aligned_file_reader.cpp | 4 +- src/logger.cpp | 104 ------------- src/math_utils.cpp | 17 +-- src/memory_mapper.cpp | 5 +- src/partition.cpp | 38 ++--- src/pq.cpp | 52 +++---- src/pq_flash_index.cpp | 70 ++++----- src/utils.cpp | 14 +- src/windows_aligned_file_reader.cpp | 14 +- 32 files changed, 367 insertions(+), 572 deletions(-) delete mode 100644 include/logger.h delete mode 100644 include/logger_impl.h delete mode 100644 src/logger.cpp diff --git a/apps/build_disk_index.cpp b/apps/build_disk_index.cpp index 1edb027da..7bd5864fe 100644 --- a/apps/build_disk_index.cpp +++ b/apps/build_disk_index.cpp @@ -150,7 +150,7 @@ int main(int argc, char **argv) use_filters, label_file, universal_label, filter_threshold, Lf); else { - diskann::cerr << "Error. Unsupported data type" << std::endl; + std::cerr << "Error. Unsupported data type" << std::endl; return -1; } } @@ -170,7 +170,7 @@ int main(int argc, char **argv) universal_label, filter_threshold, Lf); else { - diskann::cerr << "Error. Unsupported data type" << std::endl; + std::cerr << "Error. Unsupported data type" << std::endl; return -1; } } @@ -178,7 +178,7 @@ int main(int argc, char **argv) catch (const std::exception &e) { std::cout << std::string(e.what()) << std::endl; - diskann::cerr << "Index build failed." << std::endl; + std::cerr << "Index build failed." << std::endl; return -1; } } diff --git a/apps/build_memory_index.cpp b/apps/build_memory_index.cpp index d96ad7f50..13d08da3d 100644 --- a/apps/build_memory_index.cpp +++ b/apps/build_memory_index.cpp @@ -154,7 +154,7 @@ int main(int argc, char **argv) try { - diskann::cout << "Starting index build with R: " << R << " Lbuild: " << L << " alpha: " << alpha + std::cout << "Starting index build with R: " << R << " Lbuild: " << L << " alpha: " << alpha << " #threads: " << num_threads << std::endl; size_t data_num, data_dim; @@ -196,7 +196,7 @@ int main(int argc, char **argv) catch (const std::exception &e) { std::cout << std::string(e.what()) << std::endl; - diskann::cerr << "Index build failed." << std::endl; + std::cerr << "Index build failed." << std::endl; return -1; } } diff --git a/apps/build_stitched_index.cpp b/apps/build_stitched_index.cpp index 4c1941a9d..8b4c64734 100644 --- a/apps/build_stitched_index.cpp +++ b/apps/build_stitched_index.cpp @@ -270,7 +270,7 @@ void prune_and_save(path final_index_path_prefix, path full_index_path_prefix, p path label_data_path, uint32_t num_threads) { size_t dimension, number_of_label_points; - auto diskann_cout_buffer = diskann::cout.rdbuf(nullptr); + auto diskann_cout_buffer = std::cout.rdbuf(nullptr); auto std_cout_buffer = std::cout.rdbuf(nullptr); auto pruning_index_timer = std::chrono::high_resolution_clock::now(); @@ -285,7 +285,7 @@ void prune_and_save(path final_index_path_prefix, path full_index_path_prefix, p index.prune_all_neighbors(stitched_R, 750, 1.2); index.save((final_index_path_prefix).c_str()); - diskann::cout.rdbuf(diskann_cout_buffer); + std::cout.rdbuf(diskann_cout_buffer); std::cout.rdbuf(std_cout_buffer); std::chrono::duration pruning_index_time = std::chrono::high_resolution_clock::now() - pruning_index_timer; std::cout << "pruning performed in " << pruning_index_time.count() << " seconds\n" << std::endl; diff --git a/apps/range_search_disk_index.cpp b/apps/range_search_disk_index.cpp index 33a7283a7..71d4b5518 100644 --- a/apps/range_search_disk_index.cpp +++ b/apps/range_search_disk_index.cpp @@ -35,18 +35,18 @@ namespace po = boost::program_options; void print_stats(std::string category, std::vector percentiles, std::vector results) { - diskann::cout << std::setw(20) << category << ": " << std::flush; + std::cout << std::setw(20) << category << ": " << std::flush; for (uint32_t s = 0; s < percentiles.size(); s++) { - diskann::cout << std::setw(8) << percentiles[s] << "%"; + std::cout << std::setw(8) << percentiles[s] << "%"; } - diskann::cout << std::endl; - diskann::cout << std::setw(22) << " " << std::flush; + std::cout << std::endl; + std::cout << std::setw(22) << " " << std::flush; for (uint32_t s = 0; s < percentiles.size(); s++) { - diskann::cout << std::setw(9) << results[s]; + std::cout << std::setw(9) << results[s]; } - diskann::cout << std::endl; + std::cout << std::endl; } template @@ -58,11 +58,11 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre std::string disk_index_file = index_path_prefix + "_disk.index"; std::string warmup_query_file = index_path_prefix + "_sample_data.bin"; - diskann::cout << "Search parameters: #threads: " << num_threads << ", "; + std::cout << "Search parameters: #threads: " << num_threads << ", "; if (beamwidth <= 0) - diskann::cout << "beamwidth to be optimized for each L value" << std::endl; + std::cout << "beamwidth to be optimized for each L value" << std::endl; else - diskann::cout << " beamwidth: " << beamwidth << std::endl; + std::cout << " beamwidth: " << beamwidth << std::endl; // load query bin T *query = nullptr; @@ -79,7 +79,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre // groundtruth_ids, gt_num); // use for traditional truthset if (gt_num != query_num) { - diskann::cout << "Error. Mismatch in number of queries and ground truth data" << std::endl; + std::cout << "Error. Mismatch in number of queries and ground truth data" << std::endl; return -1; } calc_recall_flag = true; @@ -107,7 +107,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre } // cache bfs levels std::vector node_list; - diskann::cout << "Caching " << num_nodes_to_cache << " BFS nodes around medoid(s)" << std::endl; + std::cout << "Caching " << num_nodes_to_cache << " BFS nodes around medoid(s)" << std::endl; _pFlashIndex->cache_bfs_levels(num_nodes_to_cache, node_list); // _pFlashIndex->generate_cache_list_from_sample_queries( // warmup_query_file, 15, 6, num_nodes_to_cache, num_threads, @@ -146,7 +146,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre } } } - diskann::cout << "Warming up index... " << std::flush; + std::cout << "Warming up index... " << std::flush; std::vector warmup_result_ids_64(warmup_num, 0); std::vector warmup_result_dists(warmup_num, 0); @@ -157,23 +157,23 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre warmup_result_ids_64.data() + (i * 1), warmup_result_dists.data() + (i * 1), 4); } - diskann::cout << "..done" << std::endl; + std::cout << "..done" << std::endl; } - diskann::cout.setf(std::ios_base::fixed, std::ios_base::floatfield); - diskann::cout.precision(2); + std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + std::cout.precision(2); std::string recall_string = "Recall@rng=" + std::to_string(search_range); - diskann::cout << std::setw(6) << "L" << std::setw(12) << "Beamwidth" << std::setw(16) << "QPS" << std::setw(16) + std::cout << std::setw(6) << "L" << std::setw(12) << "Beamwidth" << std::setw(16) << "QPS" << std::setw(16) << "Mean Latency" << std::setw(16) << "99.9 Latency" << std::setw(16) << "Mean IOs" << std::setw(16) << "CPU (s)"; if (calc_recall_flag) { - diskann::cout << std::setw(16) << recall_string << std::endl; + std::cout << std::setw(16) << recall_string << std::endl; } else - diskann::cout << std::endl; - diskann::cout << "===============================================================" + std::cout << std::endl; + std::cout << "===============================================================" "===========================================" << std::endl; @@ -247,18 +247,18 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre ratio_of_sums = (1.0 * total_true_positive) / (1.0 * total_positive); } - diskann::cout << std::setw(6) << L << std::setw(12) << optimized_beamwidth << std::setw(16) << qps + std::cout << std::setw(6) << L << std::setw(12) << optimized_beamwidth << std::setw(16) << qps << std::setw(16) << mean_latency << std::setw(16) << latency_999 << std::setw(16) << mean_ios << std::setw(16) << mean_cpuus; if (calc_recall_flag) { - diskann::cout << std::setw(16) << recall << "," << ratio_of_sums << std::endl; + std::cout << std::setw(16) << recall << "," << ratio_of_sums << std::endl; } else - diskann::cout << std::endl; + std::cout << std::endl; } - diskann::cout << "Done searching. " << std::endl; + std::cout << "Done searching. " << std::endl; diskann::aligned_free(query); if (warmup != nullptr) @@ -359,7 +359,7 @@ int main(int argc, char **argv) catch (const std::exception &e) { std::cout << std::string(e.what()) << std::endl; - diskann::cerr << "Index search failed." << std::endl; + std::cerr << "Index search failed." << std::endl; return -1; } } diff --git a/apps/search_disk_index.cpp b/apps/search_disk_index.cpp index 1108da97e..0671b1f4e 100644 --- a/apps/search_disk_index.cpp +++ b/apps/search_disk_index.cpp @@ -32,18 +32,18 @@ namespace po = boost::program_options; void print_stats(std::string category, std::vector percentiles, std::vector results) { - diskann::cout << std::setw(20) << category << ": " << std::flush; + std::cout << std::setw(20) << category << ": " << std::flush; for (uint32_t s = 0; s < percentiles.size(); s++) { - diskann::cout << std::setw(8) << percentiles[s] << "%"; + std::cout << std::setw(8) << percentiles[s] << "%"; } - diskann::cout << std::endl; - diskann::cout << std::setw(22) << " " << std::flush; + std::cout << std::endl; + std::cout << std::setw(22) << " " << std::flush; for (uint32_t s = 0; s < percentiles.size(); s++) { - diskann::cout << std::setw(9) << results[s]; + std::cout << std::setw(9) << results[s]; } - diskann::cout << std::endl; + std::cout << std::endl; } template @@ -54,15 +54,15 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre const std::vector &Lvec, const float fail_if_recall_below, const std::vector &query_filters, const bool use_reorder_data = false) { - diskann::cout << "Search parameters: #threads: " << num_threads << ", "; + std::cout << "Search parameters: #threads: " << num_threads << ", "; if (beamwidth <= 0) - diskann::cout << "beamwidth to be optimized for each L value" << std::flush; + std::cout << "beamwidth to be optimized for each L value" << std::flush; else - diskann::cout << " beamwidth: " << beamwidth << std::flush; + std::cout << " beamwidth: " << beamwidth << std::flush; if (search_io_limit == std::numeric_limits::max()) - diskann::cout << "." << std::endl; + std::cout << "." << std::endl; else - diskann::cout << ", io_limit: " << search_io_limit << "." << std::endl; + std::cout << ", io_limit: " << search_io_limit << "." << std::endl; std::string warmup_query_file = index_path_prefix + "_sample_data.bin"; @@ -92,7 +92,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre diskann::load_truthset(gt_file, gt_ids, gt_dists, gt_num, gt_dim); if (gt_num != query_num) { - diskann::cout << "Error. Mismatch in number of queries and ground truth data" << std::endl; + std::cout << "Error. Mismatch in number of queries and ground truth data" << std::endl; } calc_recall_flag = true; } @@ -119,7 +119,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre } // cache bfs levels std::vector node_list; - diskann::cout << "Caching " << num_nodes_to_cache << " BFS nodes around medoid(s)" << std::endl; + std::cout << "Caching " << num_nodes_to_cache << " BFS nodes around medoid(s)" << std::endl; //_pFlashIndex->cache_bfs_levels(num_nodes_to_cache, node_list); if (num_nodes_to_cache > 0) _pFlashIndex->generate_cache_list_from_sample_queries(warmup_query_file, 15, 6, num_nodes_to_cache, num_threads, @@ -158,7 +158,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre } } } - diskann::cout << "Warming up index... " << std::flush; + std::cout << "Warming up index... " << std::flush; std::vector warmup_result_ids_64(warmup_num, 0); std::vector warmup_result_dists(warmup_num, 0); @@ -169,23 +169,23 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre warmup_result_ids_64.data() + (i * 1), warmup_result_dists.data() + (i * 1), 4); } - diskann::cout << "..done" << std::endl; + std::cout << "..done" << std::endl; } - diskann::cout.setf(std::ios_base::fixed, std::ios_base::floatfield); - diskann::cout.precision(2); + std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + std::cout.precision(2); std::string recall_string = "Recall@" + std::to_string(recall_at); - diskann::cout << std::setw(6) << "L" << std::setw(12) << "Beamwidth" << std::setw(16) << "QPS" << std::setw(16) + std::cout << std::setw(6) << "L" << std::setw(12) << "Beamwidth" << std::setw(16) << "QPS" << std::setw(16) << "Mean Latency" << std::setw(16) << "99.9 Latency" << std::setw(16) << "Mean IOs" << std::setw(16) << "CPU (s)"; if (calc_recall_flag) { - diskann::cout << std::setw(16) << recall_string << std::endl; + std::cout << std::setw(16) << recall_string << std::endl; } else - diskann::cout << std::endl; - diskann::cout << "===============================================================" + std::cout << std::endl; + std::cout << "===============================================================" "=======================================================" << std::endl; @@ -202,13 +202,13 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre if (L < recall_at) { - diskann::cout << "Ignoring search with L:" << L << " since it's smaller than K:" << recall_at << std::endl; + std::cout << "Ignoring search with L:" << L << " since it's smaller than K:" << recall_at << std::endl; continue; } if (beamwidth <= 0) { - diskann::cout << "Tuning beamwidth.." << std::endl; + std::cout << "Tuning beamwidth.." << std::endl; optimized_beamwidth = optimize_beamwidth(_pFlashIndex, warmup, warmup_num, warmup_aligned_dim, L, optimized_beamwidth); } @@ -277,19 +277,19 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre best_recall = std::max(recall, best_recall); } - diskann::cout << std::setw(6) << L << std::setw(12) << optimized_beamwidth << std::setw(16) << qps + std::cout << std::setw(6) << L << std::setw(12) << optimized_beamwidth << std::setw(16) << qps << std::setw(16) << mean_latency << std::setw(16) << latency_999 << std::setw(16) << mean_ios << std::setw(16) << mean_cpuus; if (calc_recall_flag) { - diskann::cout << std::setw(16) << recall << std::endl; + std::cout << std::setw(16) << recall << std::endl; } else - diskann::cout << std::endl; + std::cout << std::endl; delete[] stats; } - diskann::cout << "Done searching. Now saving results " << std::endl; + std::cout << "Done searching. Now saving results " << std::endl; uint64_t test_id = 0; for (auto L : Lvec) { @@ -475,7 +475,7 @@ int main(int argc, char **argv) catch (const std::exception &e) { std::cout << std::string(e.what()) << std::endl; - diskann::cerr << "Index search failed." << std::endl; + std::cerr << "Index search failed." << std::endl; return -1; } } \ No newline at end of file diff --git a/apps/search_memory_index.cpp b/apps/search_memory_index.cpp index ca3045331..3703a51c6 100644 --- a/apps/search_memory_index.cpp +++ b/apps/search_memory_index.cpp @@ -51,7 +51,7 @@ int search_memory_index(diskann::Metric &metric, const std::string &index_path, } else { - diskann::cout << " Truthset file " << truthset_file << " not found. Not computing recall." << std::endl; + std::cout << " Truthset file " << truthset_file << " not found. Not computing recall." << std::endl; } bool filtered_search = false; @@ -147,7 +147,7 @@ int search_memory_index(diskann::Metric &metric, const std::string &index_path, uint32_t L = Lvec[test_id]; if (L < recall_at) { - diskann::cout << "Ignoring search with L:" << L << " since it's smaller than K:" << recall_at << std::endl; + std::cout << "Ignoring search with L:" << L << " since it's smaller than K:" << recall_at << std::endl; continue; } @@ -244,7 +244,7 @@ int search_memory_index(diskann::Metric &metric, const std::string &index_path, { if (L < recall_at) { - diskann::cout << "Ignoring search with L:" << L << " since it's smaller than K:" << recall_at << std::endl; + std::cout << "Ignoring search with L:" << L << " since it's smaller than K:" << recall_at << std::endl; continue; } std::string cur_result_path_prefix = result_path_prefix + "_" + std::to_string(L); @@ -441,7 +441,7 @@ int main(int argc, char **argv) catch (std::exception &e) { std::cout << std::string(e.what()) << std::endl; - diskann::cerr << "Index search failed." << std::endl; + std::cerr << "Index search failed." << std::endl; return -1; } } diff --git a/apps/test_streaming_scenario.cpp b/apps/test_streaming_scenario.cpp index c48c74843..89ac46d66 100644 --- a/apps/test_streaming_scenario.cpp +++ b/apps/test_streaming_scenario.cpp @@ -132,13 +132,13 @@ void delete_and_consolidate(diskann::AbstractIndex &index, diskann::IndexWritePa int wait_time = 5; if (report._status == diskann::consolidation_report::status_code::LOCK_FAIL) { - diskann::cerr << "Unable to acquire consolidate delete lock after " + std::cerr << "Unable to acquire consolidate delete lock after " << "deleting points " << start << " to " << end << ". Will retry in " << wait_time << "seconds." << std::endl; } else if (report._status == diskann::consolidation_report::status_code::INCONSISTENT_COUNT_ERROR) { - diskann::cerr << "Inconsistent counts in data structure. " + std::cerr << "Inconsistent counts in data structure. " << "Will retry in " << wait_time << "seconds." << std::endl; } else @@ -196,7 +196,7 @@ void build_incremental_index(const std::string &data_path, const uint32_t L, con size_t num_points; diskann::get_bin_metadata(data_path, num_points, dim); - diskann::cout << "metadata: file " << data_path << " has " << num_points << " points in " << dim << " dims" + std::cout << "metadata: file " << data_path << " has " << num_points << " points in " << dim << " dims" << std::endl; aligned_dim = ROUND_UP(dim, 8); diff --git a/apps/utils/compute_groundtruth.cpp b/apps/utils/compute_groundtruth.cpp index f33a26b84..84970131e 100644 --- a/apps/utils/compute_groundtruth.cpp +++ b/apps/utils/compute_groundtruth.cpp @@ -442,7 +442,7 @@ void load_truthset(const std::string &bin_file, uint32_t *&ids, float *&dists, s { size_t read_blk_size = 64 * 1024 * 1024; cached_ifstream reader(bin_file, read_blk_size); - diskann::cout << "Reading truthset file " << bin_file.c_str() << " ..." << std::endl; + std::cout << "Reading truthset file " << bin_file.c_str() << " ..." << std::endl; size_t actual_file_size = reader.get_file_size(); int npts_i32, dim_i32; @@ -451,7 +451,7 @@ void load_truthset(const std::string &bin_file, uint32_t *&ids, float *&dists, s npts = (uint32_t)npts_i32; dim = (uint32_t)dim_i32; - diskann::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << "... " << std::endl; + std::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << "... " << std::endl; int truthset_type = -1; // 1 means truthset has ids and distances, 2 means // only ids, -1 is error @@ -473,7 +473,7 @@ void load_truthset(const std::string &bin_file, uint32_t *&ids, float *&dists, s "followed by npts*ngt distance values; actual size: " << actual_file_size << ", expected: " << expected_file_size_with_dists << " or " << expected_file_size_just_ids; - diskann::cout << stream.str(); + std::cout << stream.str(); throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -567,7 +567,7 @@ int main(int argc, char **argv) catch (const std::exception &e) { std::cout << std::string(e.what()) << std::endl; - diskann::cerr << "Compute GT failed." << std::endl; + std::cerr << "Compute GT failed." << std::endl; return -1; } } diff --git a/apps/utils/compute_groundtruth_for_filters.cpp b/apps/utils/compute_groundtruth_for_filters.cpp index 5be7135e1..fd98d31eb 100644 --- a/apps/utils/compute_groundtruth_for_filters.cpp +++ b/apps/utils/compute_groundtruth_for_filters.cpp @@ -597,7 +597,7 @@ void load_truthset(const std::string &bin_file, uint32_t *&ids, float *&dists, s { size_t read_blk_size = 64 * 1024 * 1024; cached_ifstream reader(bin_file, read_blk_size); - diskann::cout << "Reading truthset file " << bin_file.c_str() << " ..." << std::endl; + std::cout << "Reading truthset file " << bin_file.c_str() << " ..." << std::endl; size_t actual_file_size = reader.get_file_size(); int npts_i32, dim_i32; @@ -606,7 +606,7 @@ void load_truthset(const std::string &bin_file, uint32_t *&ids, float *&dists, s npts = (uint32_t)npts_i32; dim = (uint32_t)dim_i32; - diskann::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << "... " << std::endl; + std::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << "... " << std::endl; int truthset_type = -1; // 1 means truthset has ids and distances, 2 means // only ids, -1 is error @@ -628,7 +628,7 @@ void load_truthset(const std::string &bin_file, uint32_t *&ids, float *&dists, s "followed by npts*ngt distance values; actual size: " << actual_file_size << ", expected: " << expected_file_size_with_dists << " or " << expected_file_size_just_ids; - diskann::cout << stream.str(); + std::cout << stream.str(); throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -754,7 +754,7 @@ int main(int argc, char **argv) catch (const std::exception &e) { std::cout << std::string(e.what()) << std::endl; - diskann::cerr << "Compute GT failed." << std::endl; + std::cerr << "Compute GT failed." << std::endl; return -1; } } @@ -837,7 +837,7 @@ int main(int argc, char **argv) } else { - diskann::cerr << "Invalid data type" << std::endl; + std::cerr << "Invalid data type" << std::endl; return -1; } @@ -861,7 +861,7 @@ int main(int argc, char **argv) catch (const std::exception &e) { std::cout << std::string(e.what()) << std::endl; - diskann::cerr << "Compute GT failed." << std::endl; + std::cerr << "Compute GT failed." << std::endl; return -1; } diff --git a/apps/utils/count_bfs_levels.cpp b/apps/utils/count_bfs_levels.cpp index ddc4eaf0b..174092a77 100644 --- a/apps/utils/count_bfs_levels.cpp +++ b/apps/utils/count_bfs_levels.cpp @@ -75,7 +75,7 @@ int main(int argc, char **argv) catch (std::exception &e) { std::cout << std::string(e.what()) << std::endl; - diskann::cerr << "Index BFS failed." << std::endl; + std::cerr << "Index BFS failed." << std::endl; return -1; } } diff --git a/apps/utils/rand_data_gen.cpp b/apps/utils/rand_data_gen.cpp index a6f9305c8..f2001e6e4 100644 --- a/apps/utils/rand_data_gen.cpp +++ b/apps/utils/rand_data_gen.cpp @@ -201,7 +201,7 @@ int main(int argc, char **argv) catch (const std::exception &e) { std::cout << std::string(e.what()) << std::endl; - diskann::cerr << "Index build failed." << std::endl; + std::cerr << "Index build failed." << std::endl; return -1; } diff --git a/include/cached_io.h b/include/cached_io.h index daef2f2f7..7347f8ce1 100644 --- a/include/cached_io.h +++ b/include/cached_io.h @@ -7,7 +7,6 @@ #include #include -#include "logger.h" #include "ann_exception.h" // sequential cached reads @@ -43,7 +42,7 @@ class cached_ifstream this->cache_size = cacheSize; cache_buf = new char[cacheSize]; reader.read(cache_buf, cacheSize); - diskann::cout << "Opened: " << filename.c_str() << ", size: " << fsize << ", cache_size: " << cacheSize + std::cout << "Opened: " << filename.c_str() << ", size: " << fsize << ", cache_size: " << cacheSize << std::endl; } catch (std::system_error &e) @@ -78,7 +77,7 @@ class cached_ifstream stream << "Reading beyond end of file" << std::endl; stream << "n_bytes: " << n_bytes << " cached_bytes: " << cached_bytes << " fsize: " << fsize << " current pos:" << reader.tellg() << std::endl; - diskann::cout << stream.str() << std::endl; + std::cout << stream.str() << std::endl; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } memcpy(read_buf, cache_buf + cur_off, cached_bytes); @@ -126,7 +125,7 @@ class cached_ofstream assert(writer.is_open()); assert(cache_size > 0); cache_buf = new char[cache_size]; - diskann::cout << "Opened: " << filename.c_str() << ", cache_size: " << cache_size << std::endl; + std::cout << "Opened: " << filename.c_str() << ", cache_size: " << cache_size << std::endl; } catch (std::system_error &e) { @@ -155,7 +154,7 @@ class cached_ofstream if (writer.is_open()) writer.close(); - diskann::cout << "Finished writing " << fsize << "B" << std::endl; + std::cout << "Finished writing " << fsize << "B" << std::endl; } size_t get_file_size() diff --git a/include/concurrent_queue.h b/include/concurrent_queue.h index 1e57bbf0f..839a6eeb5 100644 --- a/include/concurrent_queue.h +++ b/include/concurrent_queue.h @@ -88,7 +88,7 @@ template class ConcurrentQueue { T ret = this->q.front(); this->q.pop(); - // diskann::cout << "thread_id: " << std::this_thread::get_id() << + // std::cout << "thread_id: " << std::this_thread::get_id() << // ", ctx: " // << ret.ctx << "\n"; lk.unlock(); diff --git a/include/filter_utils.h b/include/filter_utils.h index df1970be4..93e119e28 100644 --- a/include/filter_utils.h +++ b/include/filter_utils.h @@ -195,7 +195,7 @@ inline std::vector loadTags(const std::string &tags_file, const std::s diskann::load_bin(tags_file, tag_data, tag_file_npts, tag_file_ndims); if (tag_file_ndims != 1) { - diskann::cerr << "tags file error" << std::endl; + std::cerr << "tags file error" << std::endl; throw diskann::ANNException("tag file error", -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -204,7 +204,7 @@ inline std::vector loadTags(const std::string &tags_file, const std::s diskann::get_bin_metadata(base_file, base_file_npts, base_file_ndims); if (base_file_npts != tag_file_npts) { - diskann::cerr << "point num in tags file mismatch" << std::endl; + std::cerr << "point num in tags file mismatch" << std::endl; throw diskann::ANNException("point num in tags file mismatch", -1, __FUNCSIG__, __FILE__, __LINE__); } diff --git a/include/logger.h b/include/logger.h deleted file mode 100644 index 1a1b79e71..000000000 --- a/include/logger.h +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT license. -#pragma once - -#include -#include -#include "windows_customizations.h" - -#ifdef EXEC_ENV_OLS -#ifndef ENABLE_CUSTOM_LOGGER -#define ENABLE_CUSTOM_LOGGER -#endif // !ENABLE_CUSTOM_LOGGER -#endif // EXEC_ENV_OLS - -namespace diskann -{ -DISKANN_DLLEXPORT extern std::basic_ostream cout; -DISKANN_DLLEXPORT extern std::basic_ostream cerr; - -enum class DISKANN_DLLEXPORT LogLevel -{ - LL_Info = 0, - LL_Error, - LL_Count -}; - -#ifdef ENABLE_CUSTOM_LOGGER -DISKANN_DLLEXPORT void SetCustomLogger(std::function logger); -#endif -} // namespace diskann diff --git a/include/logger_impl.h b/include/logger_impl.h deleted file mode 100644 index 510c5aa08..000000000 --- a/include/logger_impl.h +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT license. - -#pragma once - -#include -#include - -#include "ann_exception.h" -#include "logger.h" - -namespace diskann -{ -class ANNStreamBuf : public std::basic_streambuf -{ - public: - DISKANN_DLLEXPORT explicit ANNStreamBuf(FILE *fp); - DISKANN_DLLEXPORT ~ANNStreamBuf(); - - DISKANN_DLLEXPORT bool is_open() const - { - return true; // because stdout and stderr are always open. - } - DISKANN_DLLEXPORT void close(); - DISKANN_DLLEXPORT virtual int underflow(); - DISKANN_DLLEXPORT virtual int overflow(int c); - DISKANN_DLLEXPORT virtual int sync(); - - private: - FILE *_fp; - char *_buf; - int _bufIndex; - std::mutex _mutex; - LogLevel _logLevel; - - int flush(); - void logImpl(char *str, int numchars); - -// Why the two buffer-sizes? If we are running normally, we are basically -// interacting with a character output system, so we short-circuit the -// output process by keeping an empty buffer and writing each character -// to stdout/stderr. But if we are running in OLS, we have to take all -// the text that is written to diskann::cout/diskann:cerr, consolidate it -// and push it out in one-shot, because the OLS infra does not give us -// character based output. Therefore, we use a larger buffer that is large -// enough to store the longest message, and continuously add characters -// to it. When the calling code outputs a std::endl or std::flush, sync() -// will be called and will output a log level, component name, and the text -// that has been collected. (sync() is also called if the buffer is full, so -// overflows/missing text are not a concern). -// This implies calling code _must_ either print std::endl or std::flush -// to ensure that the message is written immediately. -#ifdef ENABLE_CUSTOM_LOGGER - static const int BUFFER_SIZE = 1024; -#else - // Allocating an arbitrarily small buffer here because the overflow() and - // other function implementations push the BUFFER_SIZE chars into the - // buffer before flushing to fwrite. - static const int BUFFER_SIZE = 4; -#endif - - ANNStreamBuf(const ANNStreamBuf &); - ANNStreamBuf &operator=(const ANNStreamBuf &); -}; -} // namespace diskann diff --git a/include/utils.h b/include/utils.h index 58bb52a3b..3fef59776 100644 --- a/include/utils.h +++ b/include/utils.h @@ -21,7 +21,6 @@ typedef int FileHandle; #endif #include "distance.h" -#include "logger.h" #include "cached_io.h" #include "ann_exception.h" #include "windows_customizations.h" @@ -76,13 +75,13 @@ inline bool file_exists(const std::string &name, bool dirCheck = false) switch (errno) { case EINVAL: - diskann::cout << "Invalid argument passed to stat()" << std::endl; + std::cout << "Invalid argument passed to stat()" << std::endl; break; case ENOENT: // file is not existing, not an issue, so we won't cout anything. break; default: - diskann::cout << "Unexpected error in stat():" << errno << std::endl; + std::cout << "Unexpected error in stat():" << errno << std::endl; break; } return false; @@ -111,7 +110,7 @@ inline void open_file_to_write(std::ofstream &writer, const std::string &filenam auto ret = std::string(strerror_r(errno, buff, 1024)); #endif auto message = std::string("Failed to open file") + filename + " for write because " + buff + ", ret=" + ret; - diskann::cerr << message << std::endl; + std::cerr << message << std::endl; throw diskann::ANNException(message, -1); } } @@ -127,7 +126,7 @@ inline size_t get_file_size(const std::string &fname) } else { - diskann::cerr << "Could not open file: " << fname << std::endl; + std::cerr << "Could not open file: " << fname << std::endl; return 0; } } @@ -139,7 +138,7 @@ inline int delete_file(const std::string &fileName) auto rc = ::remove(fileName.c_str()); if (rc != 0) { - diskann::cerr << "Could not delete file: " << fileName + std::cerr << "Could not delete file: " << fileName << " even though it exists. This might indicate a permissions " "issue. " "If you see this message, please contact the diskann team." @@ -210,7 +209,7 @@ static const size_t MAX_SIZE_OF_STREAMBUF = 2LL * 1024 * 1024 * 1024; inline void print_error_and_terminate(std::stringstream &error_stream) { - diskann::cerr << error_stream.str() << std::endl; + std::cerr << error_stream.str() << std::endl; throw diskann::ANNException(error_stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -249,7 +248,7 @@ inline void realloc_aligned(void **ptr, size_t size, size_t align) #ifdef _WINDOWS *ptr = ::_aligned_realloc(*ptr, size, align); #else - diskann::cerr << "No aligned realloc on GCC. Must malloc and mem_align, " + std::cerr << "No aligned realloc on GCC. Must malloc and mem_align, " "left it out for now." << std::endl; #endif @@ -260,7 +259,7 @@ inline void realloc_aligned(void **ptr, size_t size, size_t align) inline void check_stop(std::string arnd) { int brnd; - diskann::cout << arnd << std::endl; + std::cout << arnd << std::endl; std::cin >> brnd; } @@ -316,7 +315,7 @@ inline void get_bin_metadata_impl(std::basic_istream &reader, size_t &nrow inline void get_bin_metadata(MemoryMappedFiles &files, const std::string &bin_file, size_t &nrows, size_t &ncols, size_t offset = 0) { - diskann::cout << "Getting metadata for file: " << bin_file << std::endl; + std::cout << "Getting metadata for file: " << bin_file << std::endl; auto fc = files.getContent(bin_file); // auto cb = ContentBuf((char*) fc._content, fc._size); // std::basic_istream reader(&cb); @@ -394,7 +393,7 @@ template inline void load_bin(MemoryMappedFiles &files, const std::string &bin_file, T *&data, size_t &npts, size_t &dim, size_t offset = 0) { - diskann::cout << "Reading bin file " << bin_file.c_str() << " at offset: " << offset << "..." << std::endl; + std::cout << "Reading bin file " << bin_file.c_str() << " at offset: " << offset << "..." << std::endl; auto fc = files.getContent(bin_file); uint32_t t_npts, t_dim; @@ -429,13 +428,13 @@ template DISKANN_DLLEXPORT void read_value(AlignedFileReader &reade template inline void load_bin(const std::string &bin_file, T *&data, size_t &npts, size_t &dim, size_t offset = 0) { - diskann::cout << "Reading bin file " << bin_file.c_str() << " ..." << std::endl; + std::cout << "Reading bin file " << bin_file.c_str() << " ..." << std::endl; std::ifstream reader; reader.exceptions(std::ifstream::failbit | std::ifstream::badbit); try { - diskann::cout << "Opening bin file " << bin_file.c_str() << "... " << std::endl; + std::cout << "Opening bin file " << bin_file.c_str() << "... " << std::endl; reader.open(bin_file, std::ios::binary | std::ios::ate); reader.seekg(0); load_bin_impl(reader, data, npts, dim, offset); @@ -444,7 +443,7 @@ inline void load_bin(const std::string &bin_file, T *&data, size_t &npts, size_t { throw FileException(bin_file, e, __FUNCSIG__, __FILE__, __LINE__); } - diskann::cout << "done." << std::endl; + std::cout << "done." << std::endl; } inline void wait_for_keystroke() @@ -459,7 +458,7 @@ inline void load_truthset(const std::string &bin_file, uint32_t *&ids, float *&d { size_t read_blk_size = 64 * 1024 * 1024; cached_ifstream reader(bin_file, read_blk_size); - diskann::cout << "Reading truthset file " << bin_file.c_str() << " ..." << std::endl; + std::cout << "Reading truthset file " << bin_file.c_str() << " ..." << std::endl; size_t actual_file_size = reader.get_file_size(); int npts_i32, dim_i32; @@ -468,7 +467,7 @@ inline void load_truthset(const std::string &bin_file, uint32_t *&ids, float *&d npts = (unsigned)npts_i32; dim = (unsigned)dim_i32; - diskann::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << "... " << std::endl; + std::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << "... " << std::endl; int truthset_type = -1; // 1 means truthset has ids and distances, 2 means // only ids, -1 is error @@ -490,7 +489,7 @@ inline void load_truthset(const std::string &bin_file, uint32_t *&ids, float *&d "followed by npts*ngt distance values; actual size: " << actual_file_size << ", expected: " << expected_file_size_with_dists << " or " << expected_file_size_just_ids; - diskann::cout << stream.str(); + std::cout << stream.str(); throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -509,7 +508,7 @@ inline void prune_truthset_for_range(const std::string &bin_file, float range, { size_t read_blk_size = 64 * 1024 * 1024; cached_ifstream reader(bin_file, read_blk_size); - diskann::cout << "Reading truthset file " << bin_file.c_str() << "... " << std::endl; + std::cout << "Reading truthset file " << bin_file.c_str() << "... " << std::endl; size_t actual_file_size = reader.get_file_size(); int npts_i32, dim_i32; @@ -520,7 +519,7 @@ inline void prune_truthset_for_range(const std::string &bin_file, float range, uint32_t *ids; float *dists; - diskann::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << "... " << std::endl; + std::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << "... " << std::endl; int truthset_type = -1; // 1 means truthset has ids and distances, 2 means // only ids, -1 is error @@ -536,7 +535,7 @@ inline void prune_truthset_for_range(const std::string &bin_file, float range, "npts followed by ngt followed by npts*ngt ids and optionally " "followed by npts*ngt distance values; actual size: " << actual_file_size << ", expected: " << expected_file_size_with_dists; - diskann::cout << stream.str(); + std::cout << stream.str(); throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -575,7 +574,7 @@ inline void load_range_truthset(const std::string &bin_file, std::vector &reader, size_t actua stream << "Error. File size mismatch. Actual size is " << actual_file_size << " while expected size is " << expected_actual_file_size << " npts = " << npts << " dim = " << dim << " size of = " << sizeof(T) << std::endl; - diskann::cout << stream.str() << std::endl; + std::cout << stream.str() << std::endl; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } rounded_dim = ROUND_UP(dim, 8); - diskann::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << ", aligned_dim = " << rounded_dim << "... " + std::cout << "Metadata: #pts = " << npts << ", #dims = " << dim << ", aligned_dim = " << rounded_dim << "... " << std::flush; size_t allocSize = npts * rounded_dim * sizeof(T); - diskann::cout << "allocating aligned memory of " << allocSize << " bytes... " << std::flush; + std::cout << "allocating aligned memory of " << allocSize << " bytes... " << std::flush; alloc_aligned(((void **)&data), allocSize, 8 * sizeof(T)); - diskann::cout << "done. Copying data to mem_aligned buffer..." << std::flush; + std::cout << "done. Copying data to mem_aligned buffer..." << std::flush; for (size_t i = 0; i < npts; i++) { reader.read((char *)(data + i * rounded_dim), dim * sizeof(T)); memset(data + i * rounded_dim + dim, 0, (rounded_dim - dim) * sizeof(T)); } - diskann::cout << " done." << std::endl; + std::cout << " done." << std::endl; } #ifdef EXEC_ENV_OLS @@ -765,7 +764,7 @@ inline void load_aligned_bin(MemoryMappedFiles &files, const std::string &bin_fi { try { - diskann::cout << "Opening bin file " << bin_file << " ..." << std::flush; + std::cout << "Opening bin file " << bin_file << " ..." << std::flush; FileContent fc = files.getContent(bin_file); ContentBuf buf((char *)fc._content, fc._size); std::basic_istream reader(&buf); @@ -788,7 +787,7 @@ inline void load_aligned_bin(const std::string &bin_file, T *&data, size_t &npts try { - diskann::cout << "Reading (with alignment) bin file " << bin_file << " ..." << std::flush; + std::cout << "Reading (with alignment) bin file " << bin_file << " ..." << std::flush; reader.open(bin_file, std::ios::binary | std::ios::ate); uint64_t fsize = reader.tellg(); @@ -939,7 +938,7 @@ inline void copy_aligned_data_from_file(const char *bin_file, T *&data, size_t & { if (data == nullptr) { - diskann::cerr << "Memory was not allocated for " << data << " before calling the load function. Exiting..." + std::cerr << "Memory was not allocated for " << data << " before calling the load function. Exiting..." << std::endl; throw diskann::ANNException("Null pointer passed to copy_aligned_data_from_file function", -1, __FUNCSIG__, __FILE__, __LINE__); @@ -1020,7 +1019,7 @@ inline bool validate_index_file_size(std::ifstream &in) in.seekg(0, in.beg); if (actual_file_size != expected_file_size) { - diskann::cerr << "Index file size error. Expected size (metadata): " << expected_file_size + std::cerr << "Index file size error. Expected size (metadata): " << expected_file_size << ", actual file size : " << actual_file_size << "." << std::endl; return false; } @@ -1102,14 +1101,14 @@ inline void clean_up_artifacts(tsl::robin_set paths_to_clean, tsl:: { std::string curr_path_to_clean(path + "_" + suffix); if (std::remove(curr_path_to_clean.c_str()) != 0) - diskann::cout << "Warning: Unable to remove file :" << curr_path_to_clean << std::endl; + std::cout << "Warning: Unable to remove file :" << curr_path_to_clean << std::endl; } } - diskann::cout << "Cleaned all artifacts" << std::endl; + std::cout << "Cleaned all artifacts" << std::endl; } catch (const std::exception &e) { - diskann::cout << "Warning: Unable to clean all artifacts " << e.what() << std::endl; + std::cout << "Warning: Unable to clean all artifacts " << e.what() << std::endl; } } @@ -1190,7 +1189,7 @@ inline void printProcessMemory(const char *message) PROCESS_MEMORY_COUNTERS counters; HANDLE h = GetCurrentProcess(); GetProcessMemoryInfo(h, &counters, sizeof(counters)); - diskann::cout << message + std::cout << message << " [Peaking Working Set size: " << counters.PeakWorkingSetSize * 1.0 / (1024.0 * 1024 * 1024) << "GB Working set size: " << counters.WorkingSetSize * 1.0 / (1024.0 * 1024 * 1024) << "GB Private bytes " << counters.PagefileUsage * 1.0 / (1024 * 1024 * 1024) << "GB]" << std::endl; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2206a01f7..314740fa4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -13,7 +13,7 @@ else() linux_aligned_file_reader.cpp math_utils.cpp natural_number_map.cpp in_mem_data_store.cpp in_mem_graph_store.cpp natural_number_set.cpp memory_mapper.cpp partition.cpp pq.cpp - pq_flash_index.cpp scratch.cpp logger.cpp utils.cpp filter_utils.cpp index_factory.cpp abstract_index.cpp) + pq_flash_index.cpp scratch.cpp utils.cpp filter_utils.cpp index_factory.cpp abstract_index.cpp) if (RESTAPI) list(APPEND CPP_SOURCES restapi/search_wrapper.cpp restapi/server.cpp) endif() diff --git a/src/disk_utils.cpp b/src/disk_utils.cpp index aadeb6dd1..604d00fda 100644 --- a/src/disk_utils.cpp +++ b/src/disk_utils.cpp @@ -7,7 +7,6 @@ #include "gperftools/malloc_extension.h" #endif -#include "logger.h" #include "disk_utils.h" #include "cached_io.h" #include "index.h" @@ -97,7 +96,7 @@ size_t calculate_num_pq_chunks(double final_index_ram_limit, size_t points_num, const std::vector ¶m_list) { size_t num_pq_chunks = (size_t)(std::floor)(uint64_t(final_index_ram_limit / (double)points_num)); - diskann::cout << "Calculated num_pq_chunks :" << num_pq_chunks << std::endl; + std::cout << "Calculated num_pq_chunks :" << num_pq_chunks << std::endl; if (param_list.size() >= 6) { float compress_ratio = (float)atof(param_list[5].c_str()); @@ -107,19 +106,19 @@ size_t calculate_num_pq_chunks(double final_index_ram_limit, size_t points_num, if (chunks_by_cr > 0 && chunks_by_cr < num_pq_chunks) { - diskann::cout << "Compress ratio:" << compress_ratio << " new #pq_chunks:" << chunks_by_cr << std::endl; + std::cout << "Compress ratio:" << compress_ratio << " new #pq_chunks:" << chunks_by_cr << std::endl; num_pq_chunks = chunks_by_cr; } else { - diskann::cout << "Compress ratio: " << compress_ratio << " #new pq_chunks: " << chunks_by_cr + std::cout << "Compress ratio: " << compress_ratio << " #new pq_chunks: " << chunks_by_cr << " is either zero or greater than num_pq_chunks: " << num_pq_chunks << ". num_pq_chunks is unchanged. " << std::endl; } } else { - diskann::cerr << "Compression ratio: " << compress_ratio << " should be in (0,1]" << std::endl; + std::cerr << "Compression ratio: " << compress_ratio << " should be in (0,1]" << std::endl; } } @@ -127,7 +126,7 @@ size_t calculate_num_pq_chunks(double final_index_ram_limit, size_t points_num, num_pq_chunks = num_pq_chunks > dim ? dim : num_pq_chunks; num_pq_chunks = num_pq_chunks > MAX_PQ_CHUNKS ? MAX_PQ_CHUNKS : num_pq_chunks; - diskann::cout << "Compressing " << dim << "-dimensional data into " << num_pq_chunks << " bytes per vector." + std::cout << "Compressing " << dim << "-dimensional data into " << num_pq_chunks << " bytes per vector." << std::endl; return num_pq_chunks; } @@ -136,7 +135,7 @@ template T *generateRandomWarmup(uint64_t warmup_num, uint64_t warm { T *warmup = nullptr; warmup_num = 100000; - diskann::cout << "Generating random warmup file with dim " << warmup_dim << " and aligned dim " + std::cout << "Generating random warmup file with dim " << warmup_dim << " and aligned dim " << warmup_aligned_dim << std::flush; diskann::alloc_aligned(((void **)&warmup), warmup_num * warmup_aligned_dim * sizeof(T), 8 * sizeof(T)); std::memset(warmup, 0, warmup_num * warmup_aligned_dim * sizeof(T)); @@ -150,7 +149,7 @@ template T *generateRandomWarmup(uint64_t warmup_num, uint64_t warm warmup[i * warmup_aligned_dim + d] = (T)dis(gen); } } - diskann::cout << "..done" << std::endl; + std::cout << "..done" << std::endl; return warmup; } @@ -165,7 +164,7 @@ T *load_warmup(MemoryMappedFiles &files, const std::string &cache_warmup_file, u if (files.fileExists(cache_warmup_file)) { diskann::load_aligned_bin(files, cache_warmup_file, warmup, warmup_num, file_dim, file_aligned_dim); - diskann::cout << "In the warmup file: " << cache_warmup_file << " File dim: " << file_dim + std::cout << "In the warmup file: " << cache_warmup_file << " File dim: " << file_dim << " File aligned dim: " << file_aligned_dim << " Expected dim: " << warmup_dim << " Expected aligned dim: " << warmup_aligned_dim << std::endl; @@ -175,7 +174,7 @@ T *load_warmup(MemoryMappedFiles &files, const std::string &cache_warmup_file, u stream << "Mismatched dimensions in sample file. file_dim = " << file_dim << " file_aligned_dim: " << file_aligned_dim << " index_dim: " << warmup_dim << " index_aligned_dim: " << warmup_aligned_dim << std::endl; - diskann::cerr << stream.str(); + std::cerr << stream.str(); throw diskann::ANNException(stream.str(), -1); } } @@ -264,14 +263,14 @@ int merge_shards(const std::string &vamana_prefix, const std::string &vamana_suf nelems += idmap.size(); } nnodes++; - diskann::cout << "# nodes: " << nnodes << ", max. degree: " << max_degree << std::endl; + std::cout << "# nodes: " << nnodes << ", max. degree: " << max_degree << std::endl; // compute inverse map: node -> shards std::vector> node_shard; node_shard.reserve(nelems); for (size_t shard = 0; shard < nshards; shard++) { - diskann::cout << "Creating inverse map -- shard #" << shard << std::endl; + std::cout << "Creating inverse map -- shard #" << shard << std::endl; for (size_t idx = 0; idx < idmaps[shard].size(); idx++) { size_t node_id = idmaps[shard][idx]; @@ -281,7 +280,7 @@ int merge_shards(const std::string &vamana_prefix, const std::string &vamana_suf std::sort(node_shard.begin(), node_shard.end(), [](const auto &left, const auto &right) { return left.first < right.first || (left.first == right.first && left.second < right.second); }); - diskann::cout << "Finished computing node -> shards map" << std::endl; + std::cout << "Finished computing node -> shards map" << std::endl; // will merge all the labels to medoids files of each shard into one // combined file @@ -370,7 +369,7 @@ int merge_shards(const std::string &vamana_prefix, const std::string &vamana_suf max_input_width = input_width > max_input_width ? input_width : max_input_width; } - diskann::cout << "Max input width: " << max_input_width << ", output width: " << output_width << std::endl; + std::cout << "Max input width: " << max_input_width << ", output width: " << output_width << std::endl; merged_vamana_writer.write((char *)&output_width, sizeof(uint32_t)); std::ofstream medoid_writer(medoids_file.c_str(), std::ios::binary); @@ -400,7 +399,7 @@ int merge_shards(const std::string &vamana_prefix, const std::string &vamana_suf merged_vamana_writer.write((char *)&merged_index_frozen, sizeof(uint64_t)); medoid_writer.close(); - diskann::cout << "Starting merge" << std::endl; + std::cout << "Starting merge" << std::endl; // Gopal. random_shuffle() is deprecated. std::random_device rng; @@ -426,7 +425,7 @@ int merge_shards(const std::string &vamana_prefix, const std::string &vamana_suf merged_index_size += (sizeof(uint32_t) + nnbrs * sizeof(uint32_t)); if (cur_id % 499999 == 1) { - diskann::cout << "." << std::flush; + std::cout << "." << std::flush; } cur_id = node_id; nnbrs = 0; @@ -439,7 +438,7 @@ int merge_shards(const std::string &vamana_prefix, const std::string &vamana_suf if (shard_nnbrs == 0) { - diskann::cout << "WARNING: shard #" << shard_id << ", node_id " << node_id << " has 0 nbrs" << std::endl; + std::cout << "WARNING: shard #" << shard_id << ", node_id " << node_id << " has 0 nbrs" << std::endl; } std::vector shard_nhood(shard_nnbrs); @@ -470,12 +469,12 @@ int merge_shards(const std::string &vamana_prefix, const std::string &vamana_suf nhood_set[p] = 0; final_nhood.clear(); - diskann::cout << "Expected size: " << merged_index_size << std::endl; + std::cout << "Expected size: " << merged_index_size << std::endl; merged_vamana_writer.reset(); merged_vamana_writer.write((char *)&merged_index_size, sizeof(uint64_t)); - diskann::cout << "Finished merge" << std::endl; + std::cout << "Finished merge" << std::endl; return 0; } @@ -535,12 +534,12 @@ void breakup_dense_points(const std::string data_file, const std::string labels_ point_cnt++; } } - diskann::cout << "fraction of dense points with >= " << density << " labels = " << (float)dense_pts / (float)npts + std::cout << "fraction of dense points with >= " << density << " labels = " << (float)dense_pts / (float)npts << std::endl; if (labels_per_point.size() != 0) { - diskann::cout << labels_per_point.size() << " is the new number of points" << std::endl; + std::cout << labels_per_point.size() << " is the new number of points" << std::endl; std::ofstream label_writer(out_labels_file); assert(label_writer.is_open()); for (uint32_t i = 0; i < labels_per_point.size(); i++) @@ -558,7 +557,7 @@ void breakup_dense_points(const std::string data_file, const std::string labels_ if (dummy_pt_ids.size() != 0) { - diskann::cout << dummy_pt_ids.size() << " is the number of dummy points created" << std::endl; + std::cout << dummy_pt_ids.size() << " is the number of dummy points created" << std::endl; data = (T *)std::realloc((void *)data, labels_per_point.size() * ndims * sizeof(T)); std::ofstream dummy_writer(out_metadata_file); assert(dummy_writer.is_open()); @@ -577,7 +576,7 @@ void extract_shard_labels(const std::string &in_label_file, const std::string &s const std::string &shard_label_file) { // assumes ith row is for ith // point in labels file - diskann::cout << "Extracting labels for shard" << std::endl; + std::cout << "Extracting labels for shard" << std::endl; uint32_t *ids = nullptr; uint64_t num_ids, tmp_dim; @@ -626,7 +625,7 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr // TODO: Make this honest when there is filter support if (full_index_ram < ram_budget * 1024 * 1024 * 1024) { - diskann::cout << "Full index fits in RAM budget, should consume at most " + std::cout << "Full index fits in RAM budget, should consume at most " << full_index_ram / (1024 * 1024 * 1024) << "GiBs, so building in one shot" << std::endl; diskann::IndexWriteParameters paras = diskann::IndexWriteParametersBuilder(L, R) @@ -673,7 +672,7 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr Timer timer; int num_parts = partition_with_ram_budget(base_file, sampling_rate, ram_budget, 2 * R / 3, merged_index_prefix, 2); - diskann::cout << timer.elapsed_seconds_for_step("partitioning data") << std::endl; + std::cout << timer.elapsed_seconds_for_step("partitioning data") << std::endl; std::string cur_centroid_filepath = merged_index_prefix + "_centroids.bin"; std::rename(cur_centroid_filepath.c_str(), centroids_file.c_str()); @@ -726,13 +725,13 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr std::remove(shard_base_file.c_str()); } - diskann::cout << timer.elapsed_seconds_for_step("building indices on shards") << std::endl; + std::cout << timer.elapsed_seconds_for_step("building indices on shards") << std::endl; timer.reset(); diskann::merge_shards(merged_index_prefix + "_subshard-", "_mem.index", merged_index_prefix + "_subshard-", "_ids_uint32.bin", num_parts, R, mem_index_path, medoids_file, use_filters, labels_to_medoids_file); - diskann::cout << timer.elapsed_seconds_for_step("merging indices") << std::endl; + std::cout << timer.elapsed_seconds_for_step("merging indices") << std::endl; // delete tempFiles for (int p = 0; p < num_parts; p++) @@ -865,7 +864,7 @@ void create_disk_layout(const std::string base_file, const std::string mem_index // create cached reader + writer size_t actual_file_size = get_file_size(mem_index_file); - diskann::cout << "Vamana index file size=" << actual_file_size << std::endl; + std::cout << "Vamana index file size=" << actual_file_size << std::endl; std::ifstream vamana_reader(mem_index_file, std::ios::binary); cached_ofstream diskann_writer(output_file, write_blk_size); @@ -897,9 +896,9 @@ void create_disk_layout(const std::string base_file, const std::string mem_index max_node_len = (((uint64_t)width_u32 + 1) * sizeof(uint32_t)) + (ndims_64 * sizeof(T)); nnodes_per_sector = SECTOR_LEN / max_node_len; - diskann::cout << "medoid: " << medoid << "B" << std::endl; - diskann::cout << "max_node_len: " << max_node_len << "B" << std::endl; - diskann::cout << "nnodes_per_sector: " << nnodes_per_sector << "B" << std::endl; + std::cout << "medoid: " << medoid << "B" << std::endl; + std::cout << "max_node_len: " << max_node_len << "B" << std::endl; + std::cout << "nnodes_per_sector: " << nnodes_per_sector << "B" << std::endl; // SECTOR_LEN buffer for each sector std::unique_ptr sector_buf = std::make_unique(SECTOR_LEN); @@ -939,13 +938,13 @@ void create_disk_layout(const std::string base_file, const std::string mem_index diskann_writer.write(sector_buf.get(), SECTOR_LEN); std::unique_ptr cur_node_coords = std::make_unique(ndims_64); - diskann::cout << "# sectors: " << n_sectors << std::endl; + std::cout << "# sectors: " << n_sectors << std::endl; uint64_t cur_node_id = 0; for (uint64_t sector = 0; sector < n_sectors; sector++) { if (sector % 100000 == 0) { - diskann::cout << "Sector #" << sector << "written" << std::endl; + std::cout << "Sector #" << sector << "written" << std::endl; } memset(sector_buf.get(), 0, SECTOR_LEN); for (uint64_t sector_node_id = 0; sector_node_id < nnodes_per_sector && cur_node_id < npts_64; sector_node_id++) @@ -989,7 +988,7 @@ void create_disk_layout(const std::string base_file, const std::string mem_index } if (append_reorder_data) { - diskann::cout << "Index written. Appending reorder data..." << std::endl; + std::cout << "Index written. Appending reorder data..." << std::endl; auto vec_len = ndims_reorder_file * sizeof(float); std::unique_ptr vec_buf = std::make_unique(vec_len); @@ -998,7 +997,7 @@ void create_disk_layout(const std::string base_file, const std::string mem_index { if (sector % 100000 == 0) { - diskann::cout << "Reorder data Sector #" << sector << "written" << std::endl; + std::cout << "Reorder data Sector #" << sector << "written" << std::endl; } memset(sector_buf.get(), 0, SECTOR_LEN); @@ -1018,7 +1017,7 @@ void create_disk_layout(const std::string base_file, const std::string mem_index } diskann_writer.close(); diskann::save_bin(output_file, output_file_meta.data(), output_file_meta.size(), 1, 0); - diskann::cout << "Output disk index file written to " << output_file << std::endl; + std::cout << "Output disk index file written to " << output_file << std::endl; } template @@ -1037,7 +1036,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const } if (param_list.size() < 5 || param_list.size() > 9) { - diskann::cout << "Correct usage of parameters is R (max degree)\n" + std::cout << "Correct usage of parameters is R (max degree)\n" "L (indexing list size, better if >= R)\n" "B (RAM limit of final index in GB)\n" "M (memory limit while indexing)\n" @@ -1136,7 +1135,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const float max_norm_of_base = diskann::prepare_base_for_inner_products(base_file, prepped_base); std::string norm_file = disk_index_path + "_max_base_norm.bin"; diskann::save_bin(norm_file, &max_norm_of_base, 1, 1); - diskann::cout << timer.elapsed_seconds_for_step("preprocessing data for inner product") << std::endl; + std::cout << timer.elapsed_seconds_for_step("preprocessing data for inner product") << std::endl; } uint32_t R = (uint32_t)atoi(param_list[0].c_str()); @@ -1164,7 +1163,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const mkl_set_num_threads(num_threads); } - diskann::cout << "Starting index build: R=" << R << " L=" << L << " Query RAM budget: " << final_index_ram_limit + std::cout << "Starting index build: R=" << R << " L=" << L << " Query RAM budget: " << final_index_ram_limit << " Indexing ram budget: " << indexing_ram_budget << " T: " << num_threads << std::endl; auto s = std::chrono::high_resolution_clock::now(); @@ -1216,12 +1215,12 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const num_pq_chunks = atoi(param_list[8].c_str()); } - diskann::cout << "Compressing " << dim << "-dimensional data into " << num_pq_chunks << " bytes per vector." + std::cout << "Compressing " << dim << "-dimensional data into " << num_pq_chunks << " bytes per vector." << std::endl; generate_quantized_data(data_file_to_use, pq_pivots_path, pq_compressed_vectors_path, compareMetric, p_val, num_pq_chunks, use_opq, codebook_prefix); - diskann::cout << timer.elapsed_seconds_for_step("generating quantized data") << std::endl; + std::cout << timer.elapsed_seconds_for_step("generating quantized data") << std::endl; // Gopal. Splitting diskann_dll into separate DLLs for search and build. // This code should only be available in the "build" DLL. @@ -1234,7 +1233,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const indexing_ram_budget, mem_index_path, medoids_path, centroids_path, build_pq_bytes, use_opq, num_threads, use_filters, labels_file_to_use, labels_to_medoids_path, universal_label, Lf); - diskann::cout << timer.elapsed_seconds_for_step("building merged vamana index") << std::endl; + std::cout << timer.elapsed_seconds_for_step("building merged vamana index") << std::endl; timer.reset(); if (!use_disk_pq) @@ -1249,7 +1248,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const diskann::create_disk_layout(disk_pq_compressed_vectors_path, mem_index_path, disk_index_path, data_file_to_use.c_str()); } - diskann::cout << timer.elapsed_seconds_for_step("generating disk layout") << std::endl; + std::cout << timer.elapsed_seconds_for_step("generating disk layout") << std::endl; double ten_percent_points = std::ceil(points_num * 0.1); double num_sample_points = @@ -1276,7 +1275,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const auto e = std::chrono::high_resolution_clock::now(); std::chrono::duration diff = e - s; - diskann::cout << "Indexing time: " << diff.count() << std::endl; + std::cout << "Indexing time: " << diff.count() << std::endl; return 0; } diff --git a/src/distance.cpp b/src/distance.cpp index 31ab9d3ff..6ab8d70d9 100644 --- a/src/distance.cpp +++ b/src/distance.cpp @@ -16,7 +16,6 @@ #include "distance.h" #include "utils.h" -#include "logger.h" #include "ann_exception.h" namespace diskann @@ -306,7 +305,7 @@ template float DistanceInnerProduct::inner_product(const T *a, c { if (!std::is_floating_point::value) { - diskann::cerr << "ERROR: Inner Product only defined for float currently." << std::endl; + std::cerr << "ERROR: Inner Product only defined for float currently." << std::endl; throw diskann::ANNException("ERROR: Inner Product only defined for float currently.", -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -426,7 +425,7 @@ template float DistanceFastL2::norm(const T *a, uint32_t size) c { if (!std::is_floating_point::value) { - diskann::cerr << "ERROR: FastL2 only defined for float currently." << std::endl; + std::cerr << "ERROR: FastL2 only defined for float currently." << std::endl; throw diskann::ANNException("ERROR: FastL2 only defined for float currently.", -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -606,35 +605,35 @@ template <> diskann::Distance *get_distance_function(diskann::Metric m) { if (Avx2SupportedCPU) { - diskann::cout << "L2: Using AVX2 distance computation DistanceL2Float" << std::endl; + std::cout << "L2: Using AVX2 distance computation DistanceL2Float" << std::endl; return new diskann::DistanceL2Float(); } else if (AvxSupportedCPU) { - diskann::cout << "L2: AVX2 not supported. Using AVX distance computation" << std::endl; + std::cout << "L2: AVX2 not supported. Using AVX distance computation" << std::endl; return new diskann::AVXDistanceL2Float(); } else { - diskann::cout << "L2: Older CPU. Using slow distance computation" << std::endl; + std::cout << "L2: Older CPU. Using slow distance computation" << std::endl; return new diskann::SlowDistanceL2(); } } else if (m == diskann::Metric::COSINE) { - diskann::cout << "Cosine: Using either AVX or AVX2 implementation" << std::endl; + std::cout << "Cosine: Using either AVX or AVX2 implementation" << std::endl; return new diskann::DistanceCosineFloat(); } else if (m == diskann::Metric::INNER_PRODUCT) { - diskann::cout << "Inner product: Using AVX2 implementation " + std::cout << "Inner product: Using AVX2 implementation " "AVXDistanceInnerProductFloat" << std::endl; return new diskann::AVXDistanceInnerProductFloat(); } else if (m == diskann::Metric::FAST_L2) { - diskann::cout << "Fast_L2: Using AVX2 implementation with norm " + std::cout << "Fast_L2: Using AVX2 implementation with norm " "memoization DistanceFastL2" << std::endl; return new diskann::DistanceFastL2(); @@ -645,7 +644,7 @@ template <> diskann::Distance *get_distance_function(diskann::Metric m) stream << "Only L2, cosine, and inner product supported for floating " "point vectors as of now." << std::endl; - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } } @@ -656,17 +655,17 @@ template <> diskann::Distance *get_distance_function(diskann::Metric m) { if (Avx2SupportedCPU) { - diskann::cout << "Using AVX2 distance computation DistanceL2Int8." << std::endl; + std::cout << "Using AVX2 distance computation DistanceL2Int8." << std::endl; return new diskann::DistanceL2Int8(); } else if (AvxSupportedCPU) { - diskann::cout << "AVX2 not supported. Using AVX distance computation" << std::endl; + std::cout << "AVX2 not supported. Using AVX distance computation" << std::endl; return new diskann::AVXDistanceL2Int8(); } else { - diskann::cout << "Older CPU. Using slow distance computation " + std::cout << "Older CPU. Using slow distance computation " "SlowDistanceL2Int." << std::endl; return new diskann::SlowDistanceL2(); @@ -674,7 +673,7 @@ template <> diskann::Distance *get_distance_function(diskann::Metric m) } else if (m == diskann::Metric::COSINE) { - diskann::cout << "Using either AVX or AVX2 for Cosine similarity " + std::cout << "Using either AVX or AVX2 for Cosine similarity " "DistanceCosineInt8." << std::endl; return new diskann::DistanceCosineInt8(); @@ -683,7 +682,7 @@ template <> diskann::Distance *get_distance_function(diskann::Metric m) { std::stringstream stream; stream << "Only L2 and cosine supported for signed byte vectors." << std::endl; - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } } @@ -693,7 +692,7 @@ template <> diskann::Distance *get_distance_function(diskann::Metric m) if (m == diskann::Metric::L2) { #ifdef _WINDOWS - diskann::cout << "WARNING: AVX/AVX2 distance function not defined for Uint8. " + std::cout << "WARNING: AVX/AVX2 distance function not defined for Uint8. " "Using " "slow version. " "Contact gopalsr@microsoft.com if you need AVX/AVX2 support." @@ -703,7 +702,7 @@ template <> diskann::Distance *get_distance_function(diskann::Metric m) } else if (m == diskann::Metric::COSINE) { - diskann::cout << "AVX/AVX2 distance function not defined for Uint8. Using " + std::cout << "AVX/AVX2 distance function not defined for Uint8. Using " "slow version SlowDistanceCosineUint8() " "Contact gopalsr@microsoft.com if you need AVX/AVX2 support." << std::endl; @@ -713,7 +712,7 @@ template <> diskann::Distance *get_distance_function(diskann::Metric m) { std::stringstream stream; stream << "Only L2 and cosine supported for uint32_t byte vectors." << std::endl; - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } } diff --git a/src/filter_utils.cpp b/src/filter_utils.cpp index 965762d1f..cb72b581e 100644 --- a/src/filter_utils.cpp +++ b/src/filter_utils.cpp @@ -37,7 +37,7 @@ void generate_label_indices(path input_data_path, path final_index_path_prefix, // for each label, build an index on resp. points double total_indexing_time = 0.0, indexing_percentage = 0.0; std::cout.setstate(std::ios_base::failbit); - diskann::cout.setstate(std::ios_base::failbit); + std::cout.setstate(std::ios_base::failbit); for (const auto &lbl : all_labels) { path curr_label_input_data_path(input_data_path + "_" + lbl); @@ -59,7 +59,7 @@ void generate_label_indices(path input_data_path, path final_index_path_prefix, index.save(curr_label_index_path.c_str()); } std::cout.clear(); - diskann::cout.clear(); + std::cout.clear(); std::cout << "\nDone. Generated per-label indices in " << total_indexing_time << " seconds\n" << std::endl; } diff --git a/src/in_mem_data_store.cpp b/src/in_mem_data_store.cpp index 4e5ca8ef6..1d3d7222a 100644 --- a/src/in_mem_data_store.cpp +++ b/src/in_mem_data_store.cpp @@ -54,7 +54,7 @@ template location_t InMemDataStore::load_impl(AlignedF std::stringstream stream; stream << "ERROR: Driver requests loading " << this->_dim << " dimension," << "but file has " << file_dim << " dimension." << std::endl; - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; aligned_free(_data); throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -76,7 +76,7 @@ template location_t InMemDataStore::load_impl(const st { std::stringstream stream; stream << "ERROR: data file " << filename << " does not exist." << std::endl; - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; aligned_free(_data); throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -87,7 +87,7 @@ template location_t InMemDataStore::load_impl(const st std::stringstream stream; stream << "ERROR: Driver requests loading " << this->_dim << " dimension," << "but file has " << file_dim << " dimension." << std::endl; - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; aligned_free(_data); throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } diff --git a/src/index.cpp b/src/index.cpp index 55ba60ac9..6e7a91b86 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -112,7 +112,7 @@ Index::Index(Metric m, const size_t dim, const size_t max_point // This is safe because T is float inside the if block. this->_distance.reset((Distance *)new AVXNormalizedCosineDistanceFloat()); this->_normalize_vecs = true; - diskann::cout << "Normalizing vectors and using L2 for cosine " + std::cout << "Normalizing vectors and using L2 for cosine " "AVXNormalizedCosineDistanceFloat()." << std::endl; } @@ -213,7 +213,7 @@ template size_t Index::save(const char *filename, bool compact_before_save } else { - diskann::cout << "Save index in a single file currently not supported. " + std::cout << "Save index in a single file currently not supported. " "Not saving the index." << std::endl; } @@ -403,7 +403,7 @@ void Index::save(const char *filename, bool compact_before_save // _max_points. reposition_frozen_point_to_end(); - diskann::cout << "Time taken for save: " << timer.elapsed() / 1000000.0 << "s." << std::endl; + std::cout << "Time taken for save: " << timer.elapsed() / 1000000.0 << "s." << std::endl; } #ifdef EXEC_ENV_OLS @@ -416,14 +416,14 @@ size_t Index::load_tags(const std::string tag_filename) { if (_enable_tags && !file_exists(tag_filename)) { - diskann::cerr << "Tag file " << tag_filename << " does not exist!" << std::endl; + std::cerr << "Tag file " << tag_filename << " does not exist!" << std::endl; throw diskann::ANNException("Tag file " + tag_filename + " does not exist!", -1, __FUNCSIG__, __FILE__, __LINE__); } #endif if (!_enable_tags) { - diskann::cout << "Tags not loaded as tags not enabled." << std::endl; + std::cout << "Tags not loaded as tags not enabled." << std::endl; return 0; } @@ -440,7 +440,7 @@ size_t Index::load_tags(const std::string tag_filename) std::stringstream stream; stream << "ERROR: Found " << file_dim << " dimensions for tags," << "but tag file must have 1 dimension." << std::endl; - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; delete[] tag_data; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -457,7 +457,7 @@ size_t Index::load_tags(const std::string tag_filename) _tag_to_location[tag] = i; } } - diskann::cout << "Tags loaded." << std::endl; + std::cout << "Tags loaded." << std::endl; delete[] tag_data; return file_num_points; } @@ -478,7 +478,7 @@ size_t Index::load_data(std::string filename) { std::stringstream stream; stream << "ERROR: data file " << filename << " does not exist." << std::endl; - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } diskann::get_bin_metadata(filename, file_num_points, file_dim); @@ -492,7 +492,7 @@ size_t Index::load_data(std::string filename) std::stringstream stream; stream << "ERROR: Driver requests loading " << _dim << " dimension," << "but file has " << file_dim << " dimension." << std::endl; - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -583,7 +583,7 @@ void Index::load(const char *filename, uint32_t num_threads, ui } else { - diskann::cout << "Single index file saving/loading support not yet " + std::cout << "Single index file saving/loading support not yet " "enabled. Not loading the index." << std::endl; return; @@ -595,7 +595,7 @@ void Index::load(const char *filename, uint32_t num_threads, ui stream << "ERROR: When loading index, loaded " << data_file_num_pts << " points from datafile, " << graph_num_pts << " from graph, and " << tags_file_num_pts << " tags, with num_frozen_pts being set to " << _num_frozen_pts << " in constructor." << std::endl; - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } #ifndef EXEC_ENV_OLS @@ -654,7 +654,7 @@ void Index::load(const char *filename, uint32_t num_threads, ui } reposition_frozen_point_to_end(); - diskann::cout << "Num frozen points:" << _num_frozen_pts << " _nd: " << _nd << " _start: " << _start + std::cout << "Num frozen points:" << _num_frozen_pts << " _nd: " << _nd << " _start: " << _start << " size(_location_to_tag): " << _location_to_tag.size() << " size(_tag_to_location):" << _tag_to_location.size() << " Max points: " << _max_points << std::endl; @@ -727,7 +727,7 @@ size_t Index::load_graph(std::string filename, size_t expected_ size_t vamana_metadata_size = sizeof(size_t) + sizeof(uint32_t) + sizeof(uint32_t) + sizeof(size_t); #endif - diskann::cout << "From graph header, expected_file_size: " << expected_file_size + std::cout << "From graph header, expected_file_size: " << expected_file_size << ", _max_observed_degree: " << _max_observed_degree << ", _start: " << _start << ", file_frozen_pts: " << file_frozen_pts << std::endl; @@ -746,14 +746,14 @@ size_t Index::load_graph(std::string filename, size_t expected_ "constructor asks for dynamic index. Exitting." << std::endl; } - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } #ifdef EXEC_ENV_OLS - diskann::cout << "Loading vamana graph from reader..." << std::flush; + std::cout << "Loading vamana graph from reader..." << std::flush; #else - diskann::cout << "Loading vamana graph " << filename << "..." << std::flush; + std::cout << "Loading vamana graph " << filename << "..." << std::flush; #endif const size_t expected_max_points = expected_num_points - file_frozen_pts; @@ -762,7 +762,7 @@ size_t Index::load_graph(std::string filename, size_t expected_ // resize the _final_graph to the larger size. if (_max_points < expected_max_points) { - diskann::cout << "Number of points in data: " << expected_max_points + std::cout << "Number of points in data: " << expected_max_points << " is greater than max_points: " << _max_points << " Setting max points to: " << expected_max_points << std::endl; _final_graph.resize(expected_max_points + _num_frozen_pts); @@ -786,7 +786,7 @@ size_t Index::load_graph(std::string filename, size_t expected_ nodes_read++; if (nodes_read % 1000000 == 0) { - diskann::cout << "." << std::flush; + std::cout << "." << std::flush; } if (k > _max_range_of_loaded_graph) { @@ -804,7 +804,7 @@ size_t Index::load_graph(std::string filename, size_t expected_ if (k == 0) { - diskann::cerr << "ERROR: Point found with no out-neighbors, point#" << nodes_read << std::endl; + std::cerr << "ERROR: Point found with no out-neighbors, point#" << nodes_read << std::endl; } cc += k; @@ -815,7 +815,7 @@ size_t Index::load_graph(std::string filename, size_t expected_ _final_graph[nodes_read - 1].swap(tmp); bytes_read += sizeof(uint32_t) * ((size_t)k + 1); if (nodes_read % 10000000 == 0) - diskann::cout << "." << std::flush; + std::cout << "." << std::flush; if (k > _max_range_of_loaded_graph) { _max_range_of_loaded_graph = k; @@ -823,7 +823,7 @@ size_t Index::load_graph(std::string filename, size_t expected_ } #endif - diskann::cout << "done. Index has " << nodes_read << " nodes and " << cc << " out-edges, _start is set to " + std::cout << "done. Index has " << nodes_read << " nodes and " << cc << " out-edges, _start is set to " << _start << std::endl; return nodes_read; } @@ -852,7 +852,7 @@ template int Index std::shared_lock lock(_tag_lock); if (_tag_to_location.find(tag) == _tag_to_location.end()) { - diskann::cout << "Tag " << tag << " does not exist" << std::endl; + std::cout << "Tag " << tag << " does not exist" << std::endl; return -1; } @@ -1015,7 +1015,7 @@ std::pair Index::iterate_to_fixed_point( { if (id >= _max_points + _num_frozen_pts) { - diskann::cerr << "Out of range loc found as an edge : " << id << std::endl; + std::cerr << "Out of range loc found as an edge : " << id << std::endl; throw diskann::ANNException(std::string("Wrong loc") + std::to_string(id), -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -1482,14 +1482,14 @@ void Index::link(const IndexWriteParameters ¶meters) if (node_ctr % 100000 == 0) { - diskann::cout << "\r" << (100.0 * node_ctr) / (visit_order.size()) << "% of index build completed." + std::cout << "\r" << (100.0 * node_ctr) / (visit_order.size()) << "% of index build completed." << std::flush; } } if (_nd > 0) { - diskann::cout << "Starting final cleanup.." << std::flush; + std::cout << "Starting final cleanup.." << std::flush; } #pragma omp parallel for schedule(dynamic, 2048) for (int64_t node_ctr = 0; node_ctr < (int64_t)(visit_order.size()); node_ctr++) @@ -1522,7 +1522,7 @@ void Index::link(const IndexWriteParameters ¶meters) } if (_nd > 0) { - diskann::cout << "done. Link time: " << ((double)link_timer.elapsed() / (double)1000000) << "s" << std::endl; + std::cout << "done. Link time: " << ((double)link_timer.elapsed() / (double)1000000) << "s" << std::endl; } } @@ -1568,7 +1568,7 @@ void Index::prune_all_neighbors(const uint32_t max_degree, cons } } - diskann::cout << "Prune time : " << timer.elapsed() / 1000 << "ms" << std::endl; + std::cout << "Prune time : " << timer.elapsed() / 1000 << "ms" << std::endl; size_t max = 0, min = 1 << 30, total = 0, cnt = 0; for (size_t i = 0; i < _max_points + _num_frozen_pts; i++) { @@ -1586,7 +1586,7 @@ void Index::prune_all_neighbors(const uint32_t max_degree, cons min = max; if (_nd > 0) { - diskann::cout << "Index built with degree: max:" << max + std::cout << "Index built with degree: max:" << max << " avg:" << (float)total / (float)(_nd + _num_frozen_pts) << " min:" << min << " count(deg<2):" << cnt << std::endl; } @@ -1611,7 +1611,7 @@ void Index::set_start_points(const T *data, size_t data_count) _data_store->set_vector((location_t)(i + _max_points), data + i * _dim); } _has_built = true; - diskann::cout << "Index start points set: #" << _num_frozen_pts << std::endl; + std::cout << "Index start points set: #" << _num_frozen_pts << std::endl; } template @@ -1665,7 +1665,7 @@ template void Index::build_with_data_populated(const IndexWriteParameters ¶meters, const std::vector &tags) { - diskann::cout << "Starting index build with " << _nd << " points... " << std::endl; + std::cout << "Starting index build with " << _nd << " points... " << std::endl; if (_nd < 1) throw ANNException("Error: Trying to build an index with 0 points", -1, __FUNCSIG__, __FILE__, __LINE__); @@ -1675,7 +1675,7 @@ void Index::build_with_data_populated(const IndexWriteParameter std::stringstream stream; stream << "ERROR: Driver requests loading " << _nd << " points from file," << "but tags vector is of size " << tags.size() << "." << std::endl; - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } if (_enable_tags) @@ -1711,7 +1711,7 @@ void Index::build_with_data_populated(const IndexWriteParameter if (pool.size() < 2) cnt++; } - diskann::cout << "Index built with degree: max:" << max << " avg:" << (float)total / (float)(_nd + _num_frozen_pts) + std::cout << "Index built with degree: max:" << max << " avg:" << (float)total / (float)(_nd + _num_frozen_pts) << " min:" << min << " count(deg<2):" << cnt << std::endl; _max_observed_degree = std::max((uint32_t)max, _max_observed_degree); @@ -1787,7 +1787,7 @@ void Index::build(const char *filename, const size_t num_points { std::stringstream stream; stream << "ERROR: Data file " << filename << " does not exist." << std::endl; - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -1826,7 +1826,7 @@ void Index::build(const char *filename, const size_t num_points std::stringstream stream; stream << "ERROR: Driver requests loading " << _dim << " dimension," << "but file has " << file_dim << " dimension." << std::endl; - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; if (_pq_dist) aligned_free(_pq_data); @@ -1856,7 +1856,7 @@ void Index::build(const char *filename, const size_t num_points } _data_store->populate_data(filename, 0U); - diskann::cout << "Using only first " << num_points_to_load << " from file.. " << std::endl; + std::cout << "Using only first " << num_points_to_load << " from file.. " << std::endl; { std::unique_lock tl(_tag_lock); @@ -1882,7 +1882,7 @@ void Index::build(const char *filename, const size_t num_points { if (file_exists(tag_filename)) { - diskann::cout << "Loading tags from " << tag_filename << " for vamana index build" << std::endl; + std::cout << "Loading tags from " << tag_filename << " for vamana index build" << std::endl; TagT *tag_data = nullptr; size_t npts, ndim; diskann::load_bin(tag_filename, tag_data, npts, ndim); @@ -1974,7 +1974,7 @@ LabelT Index::get_converted_label(const std::string &raw_label) } std::stringstream stream; stream << "Unable to find label in the Label Map"; - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -2018,7 +2018,7 @@ void Index::parse_label_file(const std::string &label_file, siz } if (lbls.size() <= 0) { - diskann::cout << "No label found"; + std::cout << "No label found"; exit(-1); } std::sort(lbls.begin(), lbls.end()); @@ -2026,7 +2026,7 @@ void Index::parse_label_file(const std::string &label_file, siz line_cnt++; } num_points = (size_t)line_cnt; - diskann::cout << "Identified " << _labels.size() << " distinct label(s)" << std::endl; + std::cout << "Identified " << _labels.size() << " distinct label(s)" << std::endl; } template @@ -2153,10 +2153,10 @@ std::pair Index::search(const T *query, con if (L > scratch->get_L()) { - diskann::cout << "Attempting to expand query scratch_space. Was created " + std::cout << "Attempting to expand query scratch_space. Was created " << "with Lsize: " << scratch->get_L() << " but search L is: " << L << std::endl; scratch->resize_for_new_L(L); - diskann::cout << "Resize completed. New scratch->L is " << scratch->get_L() << std::endl; + std::cout << "Resize completed. New scratch->L is " << scratch->get_L() << std::endl; } const std::vector unused_filter_label; @@ -2195,7 +2195,7 @@ std::pair Index::search(const T *query, con } if (pos < K) { - diskann::cerr << "Found pos: " << pos << "fewer than K elements " << K << " for query" << std::endl; + std::cerr << "Found pos: " << pos << "fewer than K elements " << K << " for query" << std::endl; } return retval; @@ -2240,10 +2240,10 @@ std::pair Index::search_with_filters(const if (L > scratch->get_L()) { - diskann::cout << "Attempting to expand query scratch_space. Was created " + std::cout << "Attempting to expand query scratch_space. Was created " << "with Lsize: " << scratch->get_L() << " but search L is: " << L << std::endl; scratch->resize_for_new_L(L); - diskann::cout << "Resize completed. New scratch->L is " << scratch->get_L() << std::endl; + std::cout << "Resize completed. New scratch->L is " << scratch->get_L() << std::endl; } std::vector filter_vec; @@ -2257,7 +2257,7 @@ std::pair Index::search_with_filters(const } else { - diskann::cout << "No filtered medoid found. exitting " + std::cout << "No filtered medoid found. exitting " << std::endl; // RKNOTE: If universal label found start there throw diskann::ANNException("No filtered medoid found. exitting ", -1); } @@ -2296,7 +2296,7 @@ std::pair Index::search_with_filters(const } if (pos < K) { - diskann::cerr << "Found fewer than K elements for query" << std::endl; + std::cerr << "Found fewer than K elements for query" << std::endl; } return retval; @@ -2334,10 +2334,10 @@ size_t Index::search_with_tags(const T *query, const uint64_t K if (L > scratch->get_L()) { - diskann::cout << "Attempting to expand query scratch_space. Was created " + std::cout << "Attempting to expand query scratch_space. Was created " << "with Lsize: " << scratch->get_L() << " but search L is: " << L << std::endl; scratch->resize_for_new_L(L); - diskann::cout << "Resize completed. New scratch->L is " << scratch->get_L() << std::endl; + std::cout << "Resize completed. New scratch->L is " << scratch->get_L() << std::endl; } std::shared_lock ul(_update_lock); @@ -2434,7 +2434,7 @@ template int Index if (!_enable_tags) { - diskann::cerr << "Tags must be instantiated for deletions" << std::endl; + std::cerr << "Tags must be instantiated for deletions" << std::endl; return -2; } @@ -2539,13 +2539,13 @@ consolidation_report Index::consolidate_deletes(const IndexWrit if (_empty_slots.size() + _nd != _max_points) { std::string err = "#empty slots + nd != max points"; - diskann::cerr << err << std::endl; + std::cerr << err << std::endl; throw ANNException(err, -1, __FUNCSIG__, __FILE__, __LINE__); } if (_location_to_tag.size() + _delete_set->size() != _nd) { - diskann::cerr << "Error: _location_to_tag.size (" << _location_to_tag.size() << ") + _delete_set->size (" + std::cerr << "Error: _location_to_tag.size (" << _location_to_tag.size() << ") + _delete_set->size (" << _delete_set->size() << ") != _nd(" << _nd << ") "; return consolidation_report(diskann::consolidation_report::status_code::INCONSISTENT_COUNT_ERROR, 0, 0, 0, 0, 0, 0, 0); @@ -2565,11 +2565,11 @@ consolidation_report Index::consolidate_deletes(const IndexWrit std::unique_lock cl(_consolidate_lock, std::defer_lock); if (!cl.try_lock()) { - diskann::cerr << "Consildate delete function failed to acquire consolidate lock" << std::endl; + std::cerr << "Consildate delete function failed to acquire consolidate lock" << std::endl; return consolidation_report(diskann::consolidation_report::status_code::LOCK_FAIL, 0, 0, 0, 0, 0, 0, 0); } - diskann::cout << "Starting consolidate_deletes... "; + std::cout << "Starting consolidate_deletes... "; std::unique_ptr> old_delete_set(new tsl::robin_set); { @@ -2623,7 +2623,7 @@ consolidation_report Index::consolidate_deletes(const IndexWrit } double duration = timer.elapsed() / 1000000.0; - diskann::cout << " done in " << duration << " seconds." << std::endl; + std::cout << " done in " << duration << " seconds." << std::endl; return consolidation_report(diskann::consolidation_report::status_code::SUCCESS, ret_nd, max_points, empty_slots_size, old_delete_set_size, delete_set_size, num_calls_to_process_delete, duration); @@ -2646,7 +2646,7 @@ template void Index void Index void Index void Index void Index void Index(stop - start).count() << "s" << std::endl; + std::cout << "Resizing took: " << std::chrono::duration(stop - start).count() << "s" << std::endl; } template @@ -3089,7 +3089,7 @@ template int Index if (_tag_to_location.find(tag) == _tag_to_location.end()) { - diskann::cerr << "Delete tag not found " << tag << std::endl; + std::cerr << "Delete tag not found " << tag << std::endl; return -1; } assert(_tag_to_location[tag] < _max_points); @@ -3170,14 +3170,14 @@ template void Index tl(_tag_lock); std::shared_lock dl(_delete_lock); - diskann::cout << "------------------- Index object: " << (uint64_t)this << " -------------------" << std::endl; - diskann::cout << "Number of points: " << _nd << std::endl; - diskann::cout << "Graph size: " << _final_graph.size() << std::endl; - diskann::cout << "Location to tag size: " << _location_to_tag.size() << std::endl; - diskann::cout << "Tag to location size: " << _tag_to_location.size() << std::endl; - diskann::cout << "Number of empty slots: " << _empty_slots.size() << std::endl; - diskann::cout << std::boolalpha << "Data compacted: " << this->_data_compacted << std::endl; - diskann::cout << "---------------------------------------------------------" + std::cout << "------------------- Index object: " << (uint64_t)this << " -------------------" << std::endl; + std::cout << "Number of points: " << _nd << std::endl; + std::cout << "Graph size: " << _final_graph.size() << std::endl; + std::cout << "Location to tag size: " << _location_to_tag.size() << std::endl; + std::cout << "Tag to location size: " << _tag_to_location.size() << std::endl; + std::cout << "Number of empty slots: " << _empty_slots.size() << std::endl; + std::cout << std::boolalpha << "Data compacted: " << this->_data_compacted << std::endl; + std::cout << "---------------------------------------------------------" "------------" << std::endl; } @@ -3205,7 +3205,7 @@ template void Index &read_reqs, io_contex { if (async == true) { - diskann::cout << "Async currently not supported in linux." << std::endl; + std::cout << "Async currently not supported in linux." << std::endl; } assert(this->file_desc != -1); execute_io(ctx, this->file_desc, read_reqs); diff --git a/src/logger.cpp b/src/logger.cpp deleted file mode 100644 index dc27f718d..000000000 --- a/src/logger.cpp +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT license. - -#include -#include - -#include "logger_impl.h" -#include "windows_customizations.h" - -namespace diskann -{ - -DISKANN_DLLEXPORT ANNStreamBuf coutBuff(stdout); -DISKANN_DLLEXPORT ANNStreamBuf cerrBuff(stderr); - -DISKANN_DLLEXPORT std::basic_ostream cout(&coutBuff); -DISKANN_DLLEXPORT std::basic_ostream cerr(&cerrBuff); - -#ifdef ENABLE_CUSTOM_LOGGER -std::function g_logger; - -void SetCustomLogger(std::function logger) -{ - g_logger = logger; - diskann::cout << "Set Custom Logger" << std::endl; -} -#endif - -ANNStreamBuf::ANNStreamBuf(FILE *fp) -{ - if (fp == nullptr) - { - throw diskann::ANNException("File pointer passed to ANNStreamBuf() cannot be null", -1); - } - if (fp != stdout && fp != stderr) - { - throw diskann::ANNException("The custom logger only supports stdout and stderr.", -1); - } - _fp = fp; - _logLevel = (_fp == stdout) ? LogLevel::LL_Info : LogLevel::LL_Error; -#ifdef ENABLE_CUSTOM_LOGGER - _buf = new char[BUFFER_SIZE + 1]; // See comment in the header -#else - _buf = new char[BUFFER_SIZE]; // See comment in the header -#endif - - std::memset(_buf, 0, (BUFFER_SIZE) * sizeof(char)); - setp(_buf, _buf + BUFFER_SIZE - 1); -} - -ANNStreamBuf::~ANNStreamBuf() -{ - sync(); - _fp = nullptr; // we'll not close because we can't. - delete[] _buf; -} - -int ANNStreamBuf::overflow(int c) -{ - std::lock_guard lock(_mutex); - if (c != EOF) - { - *pptr() = (char)c; - pbump(1); - } - flush(); - return c; -} - -int ANNStreamBuf::sync() -{ - std::lock_guard lock(_mutex); - flush(); - return 0; -} - -int ANNStreamBuf::underflow() -{ - throw diskann::ANNException("Attempt to read on streambuf meant only for writing.", -1); -} - -int ANNStreamBuf::flush() -{ - const int num = (int)(pptr() - pbase()); - logImpl(pbase(), num); - pbump(-num); - return num; -} -void ANNStreamBuf::logImpl(char *str, int num) -{ -#ifdef ENABLE_CUSTOM_LOGGER - str[num] = '\0'; // Safe. See the c'tor. - // Invoke the OLS custom logging function. - if (g_logger) - { - g_logger(_logLevel, str); - } -#else - fwrite(str, sizeof(char), num, _fp); - fflush(_fp); -#endif -} - -} // namespace diskann diff --git a/src/math_utils.cpp b/src/math_utils.cpp index 7481da848..b7b2e53aa 100644 --- a/src/math_utils.cpp +++ b/src/math_utils.cpp @@ -5,7 +5,6 @@ #include #include #include -#include "logger.h" #include "utils.h" namespace math_utils @@ -40,15 +39,15 @@ void rotate_data_randomly(float *data, size_t num_points, size_t dim, float *rot CBLAS_TRANSPOSE transpose = CblasNoTrans; if (transpose_rot) { - diskann::cout << "Transposing rotation matrix.." << std::flush; + std::cout << "Transposing rotation matrix.." << std::flush; transpose = CblasTrans; } - diskann::cout << "done Rotating data with random matrix.." << std::flush; + std::cout << "done Rotating data with random matrix.." << std::flush; cblas_sgemm(CblasRowMajor, CblasNoTrans, transpose, (MKL_INT)num_points, (MKL_INT)dim, (MKL_INT)dim, 1.0, data, (MKL_INT)dim, rot_mat, (MKL_INT)dim, 0, new_mat, (MKL_INT)dim); - diskann::cout << "done." << std::endl; + std::cout << "done." << std::endl; } // calculate k closest centers to data of num_points * dim (row major) @@ -68,7 +67,7 @@ void compute_closest_centers_in_block(const float *const data, const size_t num_ { if (k > num_centers) { - diskann::cout << "ERROR: k (" << k << ") > num_center(" << num_centers << ")" << std::endl; + std::cout << "ERROR: k (" << k << ") > num_center(" << num_centers << ")" << std::endl; return; } @@ -150,7 +149,7 @@ void compute_closest_centers(float *data, size_t num_points, size_t dim, float * { if (k > num_centers) { - diskann::cout << "ERROR: k (" << k << ") > num_center(" << num_centers << ")" << std::endl; + std::cout << "ERROR: k (" << k << ") > num_center(" << num_centers << ")" << std::endl; return; } @@ -209,7 +208,7 @@ void compute_closest_centers(float *data, size_t num_points, size_t dim, float * void process_residuals(float *data_load, size_t num_points, size_t dim, float *cur_pivot_data, size_t num_centers, uint32_t *closest_centers, bool to_subtract) { - diskann::cout << "Processing residuals of " << num_points << " points in " << dim << " dimensions using " + std::cout << "Processing residuals of " << num_points << " points in " << dim << " dimensions using " << num_centers << " centers " << std::endl; #pragma omp parallel for schedule(static, 8192) for (int64_t n_iter = 0; n_iter < (int64_t)num_points; n_iter++) @@ -340,7 +339,7 @@ float run_lloyds(float *data, size_t num_points, size_t dim, float *centers, con if (((i != 0) && ((old_residual - residual) / residual) < 0.00001) || (residual < std::numeric_limits::epsilon())) { - diskann::cout << "Residuals unchanged: " << old_residual << " becomes " << residual + std::cout << "Residuals unchanged: " << old_residual << " becomes " << residual << ". Early termination." << std::endl; break; } @@ -381,7 +380,7 @@ void kmeanspp_selecting_pivots(float *data, size_t num_points, size_t dim, float { if (num_points > 1 << 23) { - diskann::cout << "ERROR: n_pts " << num_points + std::cout << "ERROR: n_pts " << num_points << " currently not supported for k-means++, maximum is " "8388608. Falling back to random pivot " "selection." diff --git a/src/memory_mapper.cpp b/src/memory_mapper.cpp index d1c5ef984..149ec6c04 100644 --- a/src/memory_mapper.cpp +++ b/src/memory_mapper.cpp @@ -1,7 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. -#include "logger.h" #include "memory_mapper.h" #include #include @@ -28,7 +27,7 @@ MemoryMapper::MemoryMapper(const char *filename) return; } _fileSize = sb.st_size; - diskann::cout << "File Size: " << _fileSize << std::endl; + std::cout << "File Size: " << _fileSize << std::endl; _buf = (char *)mmap(NULL, _fileSize, PROT_READ, MAP_PRIVATE, _fd, 0); #else _bareFile = @@ -63,7 +62,7 @@ MemoryMapper::MemoryMapper(const char *filename) if (TRUE == GetFileSizeEx(_bareFile, &fSize)) { _fileSize = fSize.QuadPart; // take the 64-bit value - diskann::cout << "File Size: " << _fileSize << std::endl; + std::cout << "File Size: " << _fileSize << std::endl; } else { diff --git a/src/partition.cpp b/src/partition.cpp index 2d46f9faf..8afd784c1 100644 --- a/src/partition.cpp +++ b/src/partition.cpp @@ -50,7 +50,7 @@ void gen_random_slice(const std::string base_file, const std::string output_pref base_reader.read((char *)&npts_u32, sizeof(uint32_t)); base_reader.read((char *)&nd_u32, sizeof(uint32_t)); - diskann::cout << "Loading base " << base_file << ". #points: " << npts_u32 << ". #dim: " << nd_u32 << "." + std::cout << "Loading base " << base_file << ". #points: " << npts_u32 << ". #dim: " << nd_u32 << "." << std::endl; sample_writer.write((char *)&num_sampled_pts_u32, sizeof(uint32_t)); sample_writer.write((char *)&nd_u32, sizeof(uint32_t)); @@ -79,7 +79,7 @@ void gen_random_slice(const std::string base_file, const std::string output_pref sample_id_writer.write((char *)&num_sampled_pts_u32, sizeof(uint32_t)); sample_writer.close(); sample_id_writer.close(); - diskann::cout << "Wrote " << num_sampled_pts_u32 << " points to sample file: " << output_prefix + "_data.bin" + std::cout << "Wrote " << num_sampled_pts_u32 << " points to sample file: " << output_prefix + "_data.bin" << std::endl; } @@ -219,14 +219,14 @@ int estimate_cluster_sizes(float *test_data_float, size_t num_test, float *pivot } } - diskann::cout << "Estimated cluster sizes: "; + std::cout << "Estimated cluster sizes: "; for (size_t i = 0; i < num_centers; i++) { uint32_t cur_shard_count = (uint32_t)shard_counts[i]; cluster_sizes.push_back((size_t)cur_shard_count); - diskann::cout << cur_shard_count << " "; + std::cout << cur_shard_count << " "; } - diskann::cout << std::endl; + std::cout << std::endl; delete[] shard_counts; delete[] block_closest_centers; return 0; @@ -247,7 +247,7 @@ int shard_data_into_clusters(const std::string data_file, float *pivots, const s size_t num_points = npts32; if (basedim32 != dim) { - diskann::cout << "Error. dimensions dont match for train set and base set" << std::endl; + std::cout << "Error. dimensions dont match for train set and base set" << std::endl; return -1; } @@ -303,12 +303,12 @@ int shard_data_into_clusters(const std::string data_file, float *pivots, const s } size_t total_count = 0; - diskann::cout << "Actual shard sizes: " << std::flush; + std::cout << "Actual shard sizes: " << std::flush; for (size_t i = 0; i < num_centers; i++) { uint32_t cur_shard_count = (uint32_t)shard_counts[i]; total_count += cur_shard_count; - diskann::cout << cur_shard_count << " "; + std::cout << cur_shard_count << " "; shard_data_writer[i].seekp(0); shard_data_writer[i].write((char *)&cur_shard_count, sizeof(uint32_t)); shard_data_writer[i].close(); @@ -317,7 +317,7 @@ int shard_data_into_clusters(const std::string data_file, float *pivots, const s shard_idmap_writer[i].close(); } - diskann::cout << "\n Partitioned " << num_points << " with replication factor " << k_base << " to get " + std::cout << "\n Partitioned " << num_points << " with replication factor " << k_base << " to get " << total_count << " points across " << num_centers << " shards " << std::endl; return 0; } @@ -339,7 +339,7 @@ int shard_data_into_clusters_only_ids(const std::string data_file, float *pivots size_t num_points = npts32; if (basedim32 != dim) { - diskann::cout << "Error. dimensions dont match for train set and base set" << std::endl; + std::cout << "Error. dimensions dont match for train set and base set" << std::endl; return -1; } @@ -390,18 +390,18 @@ int shard_data_into_clusters_only_ids(const std::string data_file, float *pivots } size_t total_count = 0; - diskann::cout << "Actual shard sizes: " << std::flush; + std::cout << "Actual shard sizes: " << std::flush; for (size_t i = 0; i < num_centers; i++) { uint32_t cur_shard_count = (uint32_t)shard_counts[i]; total_count += cur_shard_count; - diskann::cout << cur_shard_count << " "; + std::cout << cur_shard_count << " "; shard_idmap_writer[i].seekp(0); shard_idmap_writer[i].write((char *)&cur_shard_count, sizeof(uint32_t)); shard_idmap_writer[i].close(); } - diskann::cout << "\n Partitioned " << num_points << " with replication factor " << k_base << " to get " + std::cout << "\n Partitioned " << num_points << " with replication factor " << k_base << " to get " << total_count << " points across " << num_centers << " shards " << std::endl; return 0; } @@ -463,7 +463,7 @@ int retrieve_shard_data_from_ids(const std::string data_file, std::string idmap_ break; } - diskann::cout << "Written file with " << num_written << " points" << std::endl; + std::cout << "Written file with " << num_written << " points" << std::endl; shard_data_writer.seekp(0); shard_data_writer.write((char *)&num_written, sizeof(uint32_t)); @@ -502,12 +502,12 @@ int partition(const std::string data_file, const float sampling_rate, size_t num pivot_data = new float[num_parts * train_dim]; // Process Global k-means for kmeans_partitioning Step - diskann::cout << "Processing global k-means (kmeans_partitioning Step)" << std::endl; + std::cout << "Processing global k-means (kmeans_partitioning Step)" << std::endl; kmeans::kmeanspp_selecting_pivots(train_data_float, num_train, train_dim, pivot_data, num_parts); kmeans::run_lloyds(train_data_float, num_train, train_dim, pivot_data, num_parts, max_k_means_reps, NULL, NULL); - diskann::cout << "Saving global k-center pivots" << std::endl; + std::cout << "Saving global k-center pivots" << std::endl; diskann::save_bin(output_file.c_str(), pivot_data, (size_t)num_parts, train_dim); // now pivots are ready. need to stream base points and assign them to @@ -559,7 +559,7 @@ int partition_with_ram_budget(const std::string data_file, const double sampling pivot_data = new float[num_parts * train_dim]; // Process Global k-means for kmeans_partitioning Step - diskann::cout << "Processing global k-means (kmeans_partitioning Step)" << std::endl; + std::cout << "Processing global k-means (kmeans_partitioning Step)" << std::endl; kmeans::kmeanspp_selecting_pivots(train_data_float, num_train, train_dim, pivot_data, num_parts); kmeans::run_lloyds(train_data_float, num_train, train_dim, pivot_data, num_parts, max_k_means_reps, NULL, NULL); @@ -581,7 +581,7 @@ int partition_with_ram_budget(const std::string data_file, const double sampling if (cur_shard_ram_estimate > max_ram_usage) max_ram_usage = cur_shard_ram_estimate; } - diskann::cout << "With " << num_parts + std::cout << "With " << num_parts << " parts, max estimated RAM usage: " << max_ram_usage / (1024 * 1024 * 1024) << "GB, budget given is " << ram_budget << std::endl; if (max_ram_usage > 1024 * 1024 * 1024 * ram_budget) @@ -591,7 +591,7 @@ int partition_with_ram_budget(const std::string data_file, const double sampling } } - diskann::cout << "Saving global k-center pivots" << std::endl; + std::cout << "Saving global k-center pivots" << std::endl; diskann::save_bin(output_file.c_str(), pivot_data, (size_t)num_parts, train_dim); shard_data_into_clusters_only_ids(data_file, pivot_data, num_parts, train_dim, k_base, prefix_path); diff --git a/src/pq.cpp b/src/pq.cpp index 86c68ce0a..e08fe3a25 100644 --- a/src/pq.cpp +++ b/src/pq.cpp @@ -57,7 +57,7 @@ void FixedChunkPQTable::load_pq_centroid_bin(const char *pq_table_file, size_t n if (nr != 4 && nr != 5) { - diskann::cout << "Error reading pq_pivots file " << pq_table_file + std::cout << "Error reading pq_pivots file " << pq_table_file << ". Offsets dont contain correct metadata, # offsets = " << nr << ", but expecting " << 4 << " or " << 5; throw diskann::ANNException("Error reading pq_pivots file at offsets data.", -1, __FUNCSIG__, __FILE__, @@ -66,13 +66,13 @@ void FixedChunkPQTable::load_pq_centroid_bin(const char *pq_table_file, size_t n if (nr == 4) { - diskann::cout << "Offsets: " << file_offset_data[0] << " " << file_offset_data[1] << " " << file_offset_data[2] + std::cout << "Offsets: " << file_offset_data[0] << " " << file_offset_data[1] << " " << file_offset_data[2] << " " << file_offset_data[3] << std::endl; } else if (nr == 5) { use_old_filetype = true; - diskann::cout << "Offsets: " << file_offset_data[0] << " " << file_offset_data[1] << " " << file_offset_data[2] + std::cout << "Offsets: " << file_offset_data[0] << " " << file_offset_data[1] << " " << file_offset_data[2] << " " << file_offset_data[3] << file_offset_data[4] << std::endl; } else @@ -89,7 +89,7 @@ void FixedChunkPQTable::load_pq_centroid_bin(const char *pq_table_file, size_t n if ((nr != NUM_PQ_CENTROIDS)) { - diskann::cout << "Error reading pq_pivots file " << pq_table_file << ". file_num_centers = " << nr + std::cout << "Error reading pq_pivots file " << pq_table_file << ". file_num_centers = " << nr << " but expecting " << NUM_PQ_CENTROIDS << " centers"; throw diskann::ANNException("Error reading pq_pivots file at pivots data.", -1, __FUNCSIG__, __FILE__, __LINE__); @@ -105,7 +105,7 @@ void FixedChunkPQTable::load_pq_centroid_bin(const char *pq_table_file, size_t n if ((nr != this->ndims) || (nc != 1)) { - diskann::cerr << "Error reading centroids from pq_pivots file " << pq_table_file << ". file_dim = " << nr + std::cerr << "Error reading centroids from pq_pivots file " << pq_table_file << ". file_dim = " << nr << ", file_cols = " << nc << " but expecting " << this->ndims << " entries in 1 dimension."; throw diskann::ANNException("Error reading pq_pivots file at centroid data.", -1, __FUNCSIG__, __FILE__, __LINE__); @@ -124,13 +124,13 @@ void FixedChunkPQTable::load_pq_centroid_bin(const char *pq_table_file, size_t n if (nc != 1 || (nr != num_chunks + 1 && num_chunks != 0)) { - diskann::cerr << "Error loading chunk offsets file. numc: " << nc << " (should be 1). numr: " << nr + std::cerr << "Error loading chunk offsets file. numc: " << nc << " (should be 1). numr: " << nr << " (should be " << num_chunks + 1 << " or 0 if we need to infer)" << std::endl; throw diskann::ANNException("Error loading chunk offsets file", -1, __FUNCSIG__, __FILE__, __LINE__); } this->n_chunks = nr - 1; - diskann::cout << "Loaded PQ Pivots: #ctrs: " << NUM_PQ_CENTROIDS << ", #dims: " << this->ndims + std::cout << "Loaded PQ Pivots: #ctrs: " << NUM_PQ_CENTROIDS << ", #dims: " << this->ndims << ", #chunks: " << this->n_chunks << std::endl; if (file_exists(rotmat_file)) @@ -142,7 +142,7 @@ void FixedChunkPQTable::load_pq_centroid_bin(const char *pq_table_file, size_t n #endif if (nr != this->ndims || nc != this->ndims) { - diskann::cerr << "Error loading rotation matrix file" << std::endl; + std::cerr << "Error loading rotation matrix file" << std::endl; throw diskann::ANNException("Error loading rotation matrix file", -1, __FUNCSIG__, __FILE__, __LINE__); } use_rotation = true; @@ -351,7 +351,7 @@ int generate_pq_pivots(const float *const passed_train_data, size_t num_train, u { if (num_pq_chunks > dim) { - diskann::cout << " Error: number of chunks more than dimension" << std::endl; + std::cout << " Error: number of chunks more than dimension" << std::endl; return -1; } @@ -366,7 +366,7 @@ int generate_pq_pivots(const float *const passed_train_data, size_t num_train, u diskann::load_bin(pq_pivots_path, full_pivot_data, file_num_centers, file_dim, METADATA_SIZE); if (file_dim == dim && file_num_centers == num_centers) { - diskann::cout << "PQ pivot file exists. Not generating again" << std::endl; + std::cout << "PQ pivot file exists. Not generating again" << std::endl; return -1; } } @@ -459,7 +459,7 @@ int generate_pq_pivots(const float *const passed_train_data, size_t num_train, u std::unique_ptr cur_data = std::make_unique(num_train * cur_chunk_size); std::unique_ptr closest_center = std::make_unique(num_train); - diskann::cout << "Processing chunk " << i << " with dimensions [" << chunk_offsets[i] << ", " + std::cout << "Processing chunk " << i << " with dimensions [" << chunk_offsets[i] << ", " << chunk_offsets[i + 1] << ")" << std::endl; #pragma omp parallel for schedule(static, 65536) @@ -491,7 +491,7 @@ int generate_pq_pivots(const float *const passed_train_data, size_t num_train, u chunk_offsets.size(), 1, cumul_bytes[2]); diskann::save_bin(pq_pivots_path.c_str(), cumul_bytes.data(), cumul_bytes.size(), 1, 0); - diskann::cout << "Saved pq pivot data to " << pq_pivots_path << " of size " << cumul_bytes[cumul_bytes.size() - 1] + std::cout << "Saved pq pivot data to " << pq_pivots_path << " of size " << cumul_bytes[cumul_bytes.size() - 1] << "B." << std::endl; return 0; @@ -502,7 +502,7 @@ int generate_opq_pivots(const float *passed_train_data, size_t num_train, uint32 { if (num_pq_chunks > dim) { - diskann::cout << " Error: number of chunks more than dimension" << std::endl; + std::cout << " Error: number of chunks more than dimension" << std::endl; return -1; } @@ -623,7 +623,7 @@ int generate_opq_pivots(const float *passed_train_data, size_t num_train, uint32 std::unique_ptr cur_data = std::make_unique(num_train * cur_chunk_size); std::unique_ptr closest_center = std::make_unique(num_train); - diskann::cout << "Processing chunk " << i << " with dimensions [" << chunk_offsets[i] << ", " + std::cout << "Processing chunk " << i << " with dimensions [" << chunk_offsets[i] << ", " << chunk_offsets[i + 1] << ")" << std::endl; #pragma omp parallel for schedule(static, 65536) @@ -699,7 +699,7 @@ int generate_opq_pivots(const float *passed_train_data, size_t num_train, uint32 chunk_offsets.size(), 1, cumul_bytes[2]); diskann::save_bin(opq_pivots_path.c_str(), cumul_bytes.data(), cumul_bytes.size(), 1, 0); - diskann::cout << "Saved opq pivot data to " << opq_pivots_path << " of size " << cumul_bytes[cumul_bytes.size() - 1] + std::cout << "Saved opq pivot data to " << opq_pivots_path << " of size " << cumul_bytes[cumul_bytes.size() - 1] << "B." << std::endl; std::string rotmat_path = opq_pivots_path + "_rotation_matrix.bin"; @@ -748,7 +748,7 @@ int generate_pq_data_from_pivots(const std::string &data_file, uint32_t num_cent if (nr != 4) { - diskann::cout << "Error reading pq_pivots file " << pq_pivots_path + std::cout << "Error reading pq_pivots file " << pq_pivots_path << ". Offsets dont contain correct metadata, # offsets = " << nr << ", but expecting 4."; throw diskann::ANNException("Error reading pq_pivots file at offsets data.", -1, __FUNCSIG__, __FILE__, __LINE__); @@ -758,7 +758,7 @@ int generate_pq_data_from_pivots(const std::string &data_file, uint32_t num_cent if ((nr != num_centers) || (nc != dim)) { - diskann::cout << "Error reading pq_pivots file " << pq_pivots_path << ". file_num_centers = " << nr + std::cout << "Error reading pq_pivots file " << pq_pivots_path << ". file_num_centers = " << nr << ", file_dim = " << nc << " but expecting " << num_centers << " centers in " << dim << " dimensions."; throw diskann::ANNException("Error reading pq_pivots file at pivots data.", -1, __FUNCSIG__, __FILE__, @@ -769,7 +769,7 @@ int generate_pq_data_from_pivots(const std::string &data_file, uint32_t num_cent if ((nr != dim) || (nc != 1)) { - diskann::cout << "Error reading pq_pivots file " << pq_pivots_path << ". file_dim = " << nr + std::cout << "Error reading pq_pivots file " << pq_pivots_path << ". file_dim = " << nr << ", file_cols = " << nc << " but expecting " << dim << " entries in 1 dimension."; throw diskann::ANNException("Error reading pq_pivots file at centroid data.", -1, __FUNCSIG__, __FILE__, __LINE__); @@ -779,7 +779,7 @@ int generate_pq_data_from_pivots(const std::string &data_file, uint32_t num_cent if (nr != (uint64_t)num_pq_chunks + 1 || nc != 1) { - diskann::cout << "Error reading pq_pivots file at chunk offsets; file has nr=" << nr << ",nc=" << nc + std::cout << "Error reading pq_pivots file at chunk offsets; file has nr=" << nr << ",nc=" << nc << ", expecting nr=" << num_pq_chunks + 1 << ", nc=1." << std::endl; throw diskann::ANNException("Error reading pq_pivots file at chunk offsets.", -1, __FUNCSIG__, __FILE__, __LINE__); @@ -791,12 +791,12 @@ int generate_pq_data_from_pivots(const std::string &data_file, uint32_t num_cent diskann::load_bin(rotmat_path.c_str(), rotmat_tr, nr, nc); if (nr != (uint64_t)dim || nc != dim) { - diskann::cout << "Error reading rotation matrix file." << std::endl; + std::cout << "Error reading rotation matrix file." << std::endl; throw diskann::ANNException("Error reading rotation matrix file.", -1, __FUNCSIG__, __FILE__, __LINE__); } } - diskann::cout << "Loaded PQ pivot information" << std::endl; + std::cout << "Loaded PQ pivot information" << std::endl; } std::ofstream compressed_file_writer(pq_compressed_vectors_path, std::ios::binary); @@ -835,7 +835,7 @@ int generate_pq_data_from_pivots(const std::string &data_file, uint32_t num_cent base_reader.read((char *)(block_data_T.get()), sizeof(T) * (cur_blk_size * dim)); diskann::convert_types(block_data_T.get(), block_data_tmp.get(), cur_blk_size, dim); - diskann::cout << "Processing points [" << start_id << ", " << end_id << ").." << std::flush; + std::cout << "Processing points [" << start_id << ", " << end_id << ").." << std::flush; for (size_t p = 0; p < cur_blk_size; p++) { @@ -917,7 +917,7 @@ int generate_pq_data_from_pivots(const std::string &data_file, uint32_t num_cent #ifdef SAVE_INFLATED_PQ inflated_file_writer.write((char *)(block_inflated_base.get()), cur_blk_size * dim * sizeof(float)); #endif - diskann::cout << ".done." << std::endl; + std::cout << ".done." << std::endl; } // Gopal. Splitting diskann_dll into separate DLLs for search and build. // This code should only be available in the "build" DLL. @@ -941,7 +941,7 @@ void generate_disk_quantized_data(const std::string &data_file_to_use, const std // instantiates train_data with random sample updates train_size gen_random_slice(data_file_to_use.c_str(), p_val, train_data, train_size, train_dim); - diskann::cout << "Training data with " << train_size << " samples loaded." << std::endl; + std::cout << "Training data with " << train_size << " samples loaded." << std::endl; if (disk_pq_dims > train_dim) disk_pq_dims = train_dim; @@ -971,7 +971,7 @@ void generate_quantized_data(const std::string &data_file_to_use, const std::str { // instantiates train_data with random sample updates train_size gen_random_slice(data_file_to_use.c_str(), p_val, train_data, train_size, train_dim); - diskann::cout << "Training data with " << train_size << " samples loaded." << std::endl; + std::cout << "Training data with " << train_size << " samples loaded." << std::endl; bool make_zero_mean = true; if (compareMetric == diskann::Metric::INNER_PRODUCT) @@ -993,7 +993,7 @@ void generate_quantized_data(const std::string &data_file_to_use, const std::str } else { - diskann::cout << "Skip Training with predefined pivots in: " << pq_pivots_path << std::endl; + std::cout << "Skip Training with predefined pivots in: " << pq_pivots_path << std::endl; } generate_pq_data_from_pivots(data_file_to_use, NUM_PQ_CENTROIDS, (uint32_t)num_pq_chunks, pq_pivots_path, pq_compressed_vectors_path, use_opq); diff --git a/src/pq_flash_index.cpp b/src/pq_flash_index.cpp index 78e44ba70..55546ca4a 100644 --- a/src/pq_flash_index.cpp +++ b/src/pq_flash_index.cpp @@ -47,14 +47,14 @@ PQFlashIndex::PQFlashIndex(std::shared_ptr &fileRe { if (std::is_floating_point::value) { - diskann::cout << "Cosine metric chosen for (normalized) float data." + std::cout << "Cosine metric chosen for (normalized) float data." "Changing distance to L2 to boost accuracy." << std::endl; metric = diskann::Metric::L2; } else { - diskann::cerr << "WARNING: Cannot normalize integral data types." + std::cerr << "WARNING: Cannot normalize integral data types." << " This may result in erroneous results or poor recall." << " Consider using L2 distance with integral data types." << std::endl; } @@ -84,7 +84,7 @@ template PQFlashIndex::~PQFlashIndex() if (load_flag) { - diskann::cout << "Clearing scratch" << std::endl; + std::cout << "Clearing scratch" << std::endl; ScratchStoreManager> manager(this->thread_data); manager.destroy(); this->reader->deregister_all_threads(); @@ -104,7 +104,7 @@ template PQFlashIndex::~PQFlashIndex() template void PQFlashIndex::setup_thread_data(uint64_t nthreads, uint64_t visited_reserve) { - diskann::cout << "Setting up thread-specific contexts for nthreads: " << nthreads << std::endl; + std::cout << "Setting up thread-specific contexts for nthreads: " << nthreads << std::endl; // omp parallel for to generate unique thread IDs #pragma omp parallel for num_threads((int)nthreads) for (int64_t thread = 0; thread < (int64_t)nthreads; thread++) @@ -122,7 +122,7 @@ void PQFlashIndex::setup_thread_data(uint64_t nthreads, uint64_t visi template void PQFlashIndex::load_cache_list(std::vector &node_list) { - diskann::cout << "Loading the cache list into memory.." << std::flush; + std::cout << "Loading the cache list into memory.." << std::flush; size_t num_cached_nodes = node_list.size(); // borrow thread data @@ -191,7 +191,7 @@ template void PQFlashIndex::load_cache_ node_idx++; } } - diskann::cout << "..done." << std::endl; + std::cout << "..done." << std::endl; } #ifdef EXEC_ENV_OLS @@ -245,7 +245,7 @@ void PQFlashIndex::generate_cache_list_from_sample_queries(std::strin #endif else { - diskann::cerr << "Sample bin file not found. Not generating cache." << std::endl; + std::cerr << "Sample bin file not found. Not generating cache." << std::endl; return; } @@ -301,11 +301,11 @@ void PQFlashIndex::cache_bfs_levels(uint64_t num_nodes_to_cache, std: uint64_t tenp_nodes = (uint64_t)(std::round(this->num_points * 0.1)); if (num_nodes_to_cache > tenp_nodes) { - diskann::cout << "Reducing nodes to cache from: " << num_nodes_to_cache << " to: " << tenp_nodes + std::cout << "Reducing nodes to cache from: " << num_nodes_to_cache << " to: " << tenp_nodes << "(10 percent of total nodes:" << this->num_points << ")" << std::endl; num_nodes_to_cache = tenp_nodes == 0 ? 1 : tenp_nodes; } - diskann::cout << "Caching " << num_nodes_to_cache << "..." << std::endl; + std::cout << "Caching " << num_nodes_to_cache << "..." << std::endl; // borrow thread data ScratchStoreManager> manager(this->thread_data); @@ -362,14 +362,14 @@ void PQFlashIndex::cache_bfs_levels(uint64_t num_nodes_to_cache, std: else std::sort(nodes_to_expand.begin(), nodes_to_expand.end()); - diskann::cout << "Level: " << lvl << std::flush; + std::cout << "Level: " << lvl << std::flush; bool finish_flag = false; uint64_t BLOCK_SIZE = 1024; uint64_t nblocks = DIV_ROUND_UP(nodes_to_expand.size(), BLOCK_SIZE); for (size_t block = 0; block < nblocks && !finish_flag; block++) { - diskann::cout << "." << std::flush; + std::cout << "." << std::flush; size_t start = block * BLOCK_SIZE; size_t end = (std::min)((block + 1) * BLOCK_SIZE, nodes_to_expand.size()); std::vector read_reqs; @@ -422,7 +422,7 @@ void PQFlashIndex::cache_bfs_levels(uint64_t num_nodes_to_cache, std: } } - diskann::cout << ". #nodes: " << node_set.size() - prev_node_set_size + std::cout << ". #nodes: " << node_set.size() - prev_node_set_size << ", #nodes thus far: " << node_set.size() << std::endl; prev_node_set_size = node_set.size(); lvl++; @@ -437,10 +437,10 @@ void PQFlashIndex::cache_bfs_levels(uint64_t num_nodes_to_cache, std: for (auto node : *cur_level) node_list.push_back(node); - diskann::cout << "Level: " << lvl << std::flush; - diskann::cout << ". #nodes: " << node_list.size() - prev_node_set_size << ", #nodes thus far: " << node_list.size() + std::cout << "Level: " << lvl << std::flush; + std::cout << ". #nodes: " << node_list.size() - prev_node_set_size << ", #nodes thus far: " << node_list.size() << std::endl; - diskann::cout << "done" << std::endl; + std::cout << "done" << std::endl; } template void PQFlashIndex::use_medoids_data_as_centroids() @@ -454,7 +454,7 @@ template void PQFlashIndex::use_medoids ScratchStoreManager> manager(this->thread_data); auto data = manager.scratch_space(); IOContext &ctx = data->ctx; - diskann::cout << "Loading centroid data from medoids vector data of " << num_medoids << " medoid(s)" << std::endl; + std::cout << "Loading centroid data from medoids vector data of " << num_medoids << " medoid(s)" << std::endl; for (uint64_t cur_m = 0; cur_m < num_medoids; cur_m++) { auto medoid = medoids[cur_m]; @@ -569,7 +569,7 @@ LabelT PQFlashIndex::get_converted_label(const std::string &filter_la } std::stringstream stream; stream << "Unable to find label in the Label Map"; - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } @@ -594,7 +594,7 @@ void PQFlashIndex::get_label_file_metadata(std::string map_file, uint num_pts++; } - diskann::cout << "Labels file metadata: num_points: " << num_pts << ", #total_labels: " << num_total_labels + std::cout << "Labels file metadata: num_points: " << num_pts << ", #total_labels: " << num_total_labels << std::endl; infile.close(); } @@ -659,7 +659,7 @@ void PQFlashIndex::parse_label_file(const std::string &label_file, si int32_t filter_num = get_filter_number(token_as_num); if (filter_num == -1) { - diskann::cout << "Error!! " << std::endl; + std::cout << "Error!! " << std::endl; exit(-1); } _pts_to_labels[counter++] = filter_num; @@ -669,7 +669,7 @@ void PQFlashIndex::parse_label_file(const std::string &label_file, si if (num_lbls_in_cur_pt == 0) { - diskann::cout << "No label found for point " << line_cnt << std::endl; + std::cout << "No label found for point " << line_cnt << std::endl; exit(-1); } line_cnt++; @@ -683,7 +683,7 @@ template void PQFlashIndex::set_univers int32_t temp_filter_num = get_filter_number(label); if (temp_filter_num == -1) { - diskann::cout << "Error, could not find universal label." << std::endl; + std::cout << "Error, could not find universal label." << std::endl; } else { @@ -747,7 +747,7 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons if (pq_file_num_centroids != 256) { - diskann::cout << "Error. Number of PQ centroids is not 256. Exiting." << std::endl; + std::cout << "Error. Number of PQ centroids is not 256. Exiting." << std::endl; return -1; } @@ -845,7 +845,7 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons _real_to_dummy_map[real_id].emplace_back(dummy_id); } dummy_map_stream.close(); - diskann::cout << "Loaded dummy map" << std::endl; + std::cout << "Loaded dummy map" << std::endl; } } @@ -855,7 +855,7 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons pq_table.load_pq_centroid_bin(pq_table_bin.c_str(), nchunks_u64); #endif - diskann::cout << "Loaded PQ centroids and in-memory compressed vectors. #points: " << num_points + std::cout << "Loaded PQ centroids and in-memory compressed vectors. #points: " << num_points << " #dim: " << data_dim << " #aligned_dim: " << aligned_dim << " #chunks: " << n_chunks << std::endl; if (n_chunks > MAX_PQ_CHUNKS) @@ -883,7 +883,7 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons disk_pq_n_chunks = disk_pq_table.get_num_chunks(); disk_bytes_per_point = disk_pq_n_chunks * sizeof(uint8_t); // revising disk_bytes_per_point since DISK PQ is used. - diskann::cout << "Disk index uses PQ data compressed down to " << disk_pq_n_chunks << " bytes per point." + std::cout << "Disk index uses PQ data compressed down to " << disk_pq_n_chunks << " bytes per point." << std::endl; } @@ -917,7 +917,7 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons if (disk_nnodes != num_points) { - diskann::cout << "Mismatch in #points for compressed data file and disk " + std::cout << "Mismatch in #points for compressed data file and disk " "index file: " << disk_nnodes << " vs " << num_points << std::endl; return -1; @@ -946,7 +946,7 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons this->frozen_location = file_frozen_id; if (this->num_frozen_points == 1) { - diskann::cout << " Detected frozen point in index at location " << this->frozen_location + std::cout << " Detected frozen point in index at location " << this->frozen_location << ". Will not output it at search time." << std::endl; } @@ -964,10 +964,10 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons READ_U64(index_metadata, this->nvecs_per_sector); } - diskann::cout << "Disk-Index File Meta-data: "; - diskann::cout << "# nodes per sector: " << nnodes_per_sector; - diskann::cout << ", max node len (bytes): " << max_node_len; - diskann::cout << ", max node degree: " << max_degree << std::endl; + std::cout << "Disk-Index File Meta-data: "; + std::cout << "# nodes per sector: " << nnodes_per_sector; + std::cout << ", max node len (bytes): " << max_node_len; + std::cout << ", max node degree: " << max_degree << std::endl; #ifdef EXEC_ENV_OLS delete[] bytes; @@ -1011,7 +1011,7 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons if (!file_exists(centroids_file)) { #endif - diskann::cout << "Centroid data file not found. Using corresponding vectors " + std::cout << "Centroid data file not found. Using corresponding vectors " "for the medoids " << std::endl; use_medoids_data_as_centroids(); @@ -1033,7 +1033,7 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons "m times data_dim vector of float, where m is number of " "medoids " "in medoids file."; - diskann::cerr << stream.str() << std::endl; + std::cerr << stream.str() << std::endl; throw diskann::ANNException(stream.str(), -1, __FUNCSIG__, __FILE__, __LINE__); } } @@ -1054,10 +1054,10 @@ int PQFlashIndex::load_from_separate_paths(uint32_t num_threads, cons float *norm_val; diskann::load_bin(norm_file, norm_val, dumr, dumc); this->max_base_norm = norm_val[0]; - diskann::cout << "Setting re-scaling factor of base vectors to " << this->max_base_norm << std::endl; + std::cout << "Setting re-scaling factor of base vectors to " << this->max_base_norm << std::endl; delete[] norm_val; } - diskann::cout << "done.." << std::endl; + std::cout << "done.." << std::endl; return 0; } diff --git a/src/utils.cpp b/src/utils.cpp index b675e656d..be031664d 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -108,12 +108,12 @@ void normalize_data_file(const std::string &inFileName, const std::string &outFi size_t npts = (size_t)npts_s32; size_t ndims = (size_t)ndims_s32; - diskann::cout << "Normalizing FLOAT vectors in file: " << inFileName << std::endl; - diskann::cout << "Dataset: #pts = " << npts << ", # dims = " << ndims << std::endl; + std::cout << "Normalizing FLOAT vectors in file: " << inFileName << std::endl; + std::cout << "Dataset: #pts = " << npts << ", # dims = " << ndims << std::endl; size_t blk_size = 131072; size_t nblks = ROUND_UP(npts, blk_size) / blk_size; - diskann::cout << "# blks: " << nblks << std::endl; + std::cout << "# blks: " << nblks << std::endl; float *read_buf = new float[npts * ndims]; for (size_t i = 0; i < nblks; i++) @@ -123,7 +123,7 @@ void normalize_data_file(const std::string &inFileName, const std::string &outFi } delete[] read_buf; - diskann::cout << "Wrote normalized points to file: " << outFileName << std::endl; + std::cout << "Wrote normalized points to file: " << outFileName << std::endl; } double calculate_recall(uint32_t num_queries, uint32_t *gold_std, float *gs_dist, uint32_t dim_gs, @@ -193,7 +193,7 @@ double calculate_recall(uint32_t num_queries, uint32_t *gold_std, float *gs_dist if ((active_points_count < recall_at && !active_tags.empty()) && !printed) { - diskann::cout << "Warning: Couldn't find enough closest neighbors " << active_points_count << "/" + std::cout << "Warning: Couldn't find enough closest neighbors " << active_points_count << "/" << recall_at << " from " "truthset for query # " @@ -270,7 +270,7 @@ void get_bin_metadata(AlignedFileReader &reader, size_t &npts, size_t &ndim, siz { npts = buf[0]; ndim = buf[1]; - diskann::cout << "File has: " << npts << " points, " << ndim << " dimensions at offset: " << offset + std::cout << "File has: " << npts << " points, " << ndim << " dimensions at offset: " << offset << std::endl; } else @@ -333,7 +333,7 @@ void copy_aligned_data_from_file(AlignedFileReader &reader, T *&data, size_t &np { if (data == nullptr) { - diskann::cerr << "Memory was not allocated for " << data << " before calling the load function. Exiting..." + std::cerr << "Memory was not allocated for " << data << " before calling the load function. Exiting..." << std::endl; throw diskann::ANNException("Null pointer passed to copy_aligned_data_from_file()", -1, __FUNCSIG__, __FILE__, __LINE__); diff --git a/src/windows_aligned_file_reader.cpp b/src/windows_aligned_file_reader.cpp index 3650b928a..06646a207 100644 --- a/src/windows_aligned_file_reader.cpp +++ b/src/windows_aligned_file_reader.cpp @@ -35,7 +35,7 @@ void WindowsAlignedFileReader::register_thread() std::unique_lock lk(this->ctx_mut); if (this->ctx_map.find(std::this_thread::get_id()) != ctx_map.end()) { - diskann::cout << "Warning:: Duplicate registration for thread_id : " << std::this_thread::get_id() << std::endl; + std::cout << "Warning:: Duplicate registration for thread_id : " << std::this_thread::get_id() << std::endl; } IOContext ctx; @@ -49,11 +49,11 @@ void WindowsAlignedFileReader::register_thread() char filePath[c_max_filepath_len]; if (wcstombs_s(&actual_len, filePath, c_max_filepath_len, m_filename.c_str(), m_filename.length()) == 0) { - diskann::cout << "Error opening " << filePath << " -- error=" << GetLastError() << std::endl; + std::cout << "Error opening " << filePath << " -- error=" << GetLastError() << std::endl; } else { - diskann::cout << "Error converting wchar to char -- error=" << GetLastError() << std::endl; + std::cout << "Error converting wchar to char -- error=" << GetLastError() << std::endl; } } @@ -102,7 +102,7 @@ void WindowsAlignedFileReader::read(std::vector &read_reqs, IOConte /* if (ResetEvent(os.hEvent) == 0) { - diskann::cerr << "ResetEvent failed" << std::endl; + std::cerr << "ResetEvent failed" << std::endl; exit(-3); } */ @@ -135,12 +135,12 @@ void WindowsAlignedFileReader::read(std::vector &read_reqs, IOConte auto error = GetLastError(); if (error != ERROR_IO_PENDING) { - diskann::cerr << "Error queuing IO -- " << error << "\n"; + std::cerr << "Error queuing IO -- " << error << "\n"; } } else { - diskann::cerr << "Error queueing IO -- ReadFile returned TRUE" << std::endl; + std::cerr << "Error queueing IO -- ReadFile returned TRUE" << std::endl; } } DWORD n_read = 0; @@ -162,7 +162,7 @@ void WindowsAlignedFileReader::read(std::vector &read_reqs, IOConte DWORD error = GetLastError(); if (error != WAIT_TIMEOUT) { - diskann::cerr << "GetQueuedCompletionStatus() failed " + std::cerr << "GetQueuedCompletionStatus() failed " "with error = " << error << std::endl; throw diskann::ANNException("GetQueuedCompletionStatus failed with error: ", error, __FUNCSIG__,