Skip to content

Commit

Permalink
Improve help formatting in CLI tools (#390)
Browse files Browse the repository at this point in the history
* Added utilities to standardize help across cli tools.  #370

* Made three option groupings (required/optional/print)

* Moved common parameter descriptions to a common file.  #370

* Updated usage statement for search_disk_app #370

* Updated range_search_disk_index to use the new required/optional format.  #370

* Updated test apps to use the new help format.  #370

* Fixed format issue.  #370

* Updated help format for the 'build' apps. #370

* Fixed code formatting.  #370

* Added src/*.hpp to the clang format.  #370

* Moved header into the headers directory.  #370

* Added missing configs.  #370

* Removed superflous paths from include.  #370

* Added #pragma once.  #370

* Type-o fixes.  #370

* Fixed capitolization of constant.  #370

* Make fail_if_recall description more accurate.  #370

* Changed to using set notation.  #370

* Better explanations for some options.  #370

* Added short explanation of file format.  #370

---------

Co-authored-by: Jon McLean <none@example.com>
Co-authored-by: Jonathan McLean <Jonathan.McLean@microsoft.com>
  • Loading branch information
3 people committed Jul 19, 2023
1 parent f636da4 commit 2c9912a
Show file tree
Hide file tree
Showing 10 changed files with 457 additions and 285 deletions.
109 changes: 57 additions & 52 deletions apps/build_disk_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "math_utils.h"
#include "index.h"
#include "partition.h"
#include "program_options_utils.hpp"

namespace po = boost::program_options;

Expand All @@ -21,61 +22,65 @@ int main(int argc, char **argv)
bool append_reorder_data = false;
bool use_opq = false;

po::options_description desc{"Arguments"};
po::options_description desc{
program_options_utils::make_program_description("build_disk_index", "Build a disk-based index.")};
try
{
desc.add_options()("help,h", "Print information on arguments");
desc.add_options()("data_type", po::value<std::string>(&data_type)->required(), "data type <int8/uint8/float>");
desc.add_options()("dist_fn", po::value<std::string>(&dist_fn)->required(), "distance function <l2/mips>");
desc.add_options()("data_path", po::value<std::string>(&data_path)->required(),
"Input data file in bin format");
desc.add_options()("index_path_prefix", po::value<std::string>(&index_path_prefix)->required(),
"Path prefix for saving index file components");
desc.add_options()("max_degree,R", po::value<uint32_t>(&R)->default_value(64), "Maximum graph degree");
desc.add_options()("Lbuild,L", po::value<uint32_t>(&L)->default_value(100),
"Build complexity, higher value results in better graphs");
desc.add_options()("search_DRAM_budget,B", po::value<float>(&B)->required(),
"DRAM budget in GB for searching the index to set the "
"compressed level for data while search happens");
desc.add_options()("build_DRAM_budget,M", po::value<float>(&M)->required(),
"DRAM budget in GB for building the index");
desc.add_options()("num_threads,T", po::value<uint32_t>(&num_threads)->default_value(omp_get_num_procs()),
"Number of threads used for building index (defaults to "
"omp_get_num_procs())");
desc.add_options()("QD", po::value<uint32_t>(&QD)->default_value(0), " Quantized Dimension for compression");
desc.add_options()("codebook_prefix", po::value<std::string>(&codebook_prefix)->default_value(""),
"Path prefix for pre-trained codebook");
desc.add_options()("PQ_disk_bytes", po::value<uint32_t>(&disk_PQ)->default_value(0),
"Number of bytes to which vectors should be compressed "
"on SSD; 0 for no compression");
desc.add_options()("append_reorder_data", po::bool_switch()->default_value(false),
"Include full precision data in the index. Use only in "
"conjuction with compressed data on SSD.");
desc.add_options()("build_PQ_bytes", po::value<uint32_t>(&build_PQ)->default_value(0),
"Number of PQ bytes to build the index; 0 for full "
"precision build");
desc.add_options()("use_opq", po::bool_switch()->default_value(false),
"Use Optimized Product Quantization (OPQ).");
desc.add_options()("label_file", po::value<std::string>(&label_file)->default_value(""),
"Input label file in txt format for Filtered Index build ."
"The file should contain comma separated filters for each node "
"with each line corresponding to a graph node");
desc.add_options()("universal_label", po::value<std::string>(&universal_label)->default_value(""),
"Universal label, Use only in conjuction with label file for "
"filtered "
"index build. If a graph node has all the labels against it, we "
"can "
"assign a special universal filter to the point instead of comma "
"separated filters for that point");
desc.add_options()("FilteredLbuild", po::value<uint32_t>(&Lf)->default_value(0),
"Build complexity for filtered points, higher value "
"results in better graphs");
desc.add_options()("filter_threshold,F", po::value<uint32_t>(&filter_threshold)->default_value(0),
"Threshold to break up the existing nodes to generate new graph "
"internally where each node has a maximum F labels.");
desc.add_options()("label_type", po::value<std::string>(&label_type)->default_value("uint"),
"Storage type of Labels <uint/ushort>, default value is uint which "
"will consume memory 4 bytes per filter");

// Required parameters
po::options_description required_configs("Required");
required_configs.add_options()("data_type", po::value<std::string>(&data_type)->required(),
program_options_utils::DATA_TYPE_DESCRIPTION);
required_configs.add_options()("dist_fn", po::value<std::string>(&dist_fn)->required(),
program_options_utils::DISTANCE_FUNCTION_DESCRIPTION);
required_configs.add_options()("index_path_prefix", po::value<std::string>(&index_path_prefix)->required(),
program_options_utils::INDEX_PATH_PREFIX_DESCRIPTION);
required_configs.add_options()("data_path", po::value<std::string>(&data_path)->required(),
program_options_utils::INPUT_DATA_PATH);
required_configs.add_options()("search_DRAM_budget,B", po::value<float>(&B)->required(),
"DRAM budget in GB for searching the index to set the "
"compressed level for data while search happens");
required_configs.add_options()("build_DRAM_budget,M", po::value<float>(&M)->required(),
"DRAM budget in GB for building the index");

// Optional parameters
po::options_description optional_configs("Optional");
optional_configs.add_options()("num_threads,T",
po::value<uint32_t>(&num_threads)->default_value(omp_get_num_procs()),
program_options_utils::NUMBER_THREADS_DESCRIPTION);
optional_configs.add_options()("max_degree,R", po::value<uint32_t>(&R)->default_value(64),
program_options_utils::MAX_BUILD_DEGREE);
optional_configs.add_options()("Lbuild,L", po::value<uint32_t>(&L)->default_value(100),
program_options_utils::GRAPH_BUILD_COMPLEXITY);
optional_configs.add_options()("QD", po::value<uint32_t>(&QD)->default_value(0),
" Quantized Dimension for compression");
optional_configs.add_options()("codebook_prefix", po::value<std::string>(&codebook_prefix)->default_value(""),
"Path prefix for pre-trained codebook");
optional_configs.add_options()("PQ_disk_bytes", po::value<uint32_t>(&disk_PQ)->default_value(0),
"Number of bytes to which vectors should be compressed "
"on SSD; 0 for no compression");
optional_configs.add_options()("append_reorder_data", po::bool_switch()->default_value(false),
"Include full precision data in the index. Use only in "
"conjuction with compressed data on SSD.");
optional_configs.add_options()("build_PQ_bytes", po::value<uint32_t>(&build_PQ)->default_value(0),
program_options_utils::BUIlD_GRAPH_PQ_BYTES);
optional_configs.add_options()("use_opq", po::bool_switch()->default_value(false),
program_options_utils::USE_OPQ);
optional_configs.add_options()("label_file", po::value<std::string>(&label_file)->default_value(""),
program_options_utils::LABEL_FILE);
optional_configs.add_options()("universal_label", po::value<std::string>(&universal_label)->default_value(""),
program_options_utils::UNIVERSAL_LABEL);
optional_configs.add_options()("FilteredLbuild", po::value<uint32_t>(&Lf)->default_value(0),
program_options_utils::FILTERED_LBUILD);
optional_configs.add_options()("filter_threshold,F", po::value<uint32_t>(&filter_threshold)->default_value(0),
"Threshold to break up the existing nodes to generate new graph "
"internally where each node has a maximum F labels.");
optional_configs.add_options()("label_type", po::value<std::string>(&label_type)->default_value("uint"),
program_options_utils::LABEL_TYPE_DESCRIPTION);

// Merge required and optional parameters
desc.add(required_configs).add(optional_configs);

po::variables_map vm;
po::store(po::parse_command_line(argc, argv, desc), vm);
Expand Down
80 changes: 42 additions & 38 deletions apps/build_memory_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "index.h"
#include "utils.h"
#include "program_options_utils.hpp"

#ifndef _WINDOWS
#include <sys/mman.h>
Expand Down Expand Up @@ -72,47 +73,50 @@ int main(int argc, char **argv)
float alpha;
bool use_pq_build, use_opq;

po::options_description desc{"Arguments"};
po::options_description desc{
program_options_utils::make_program_description("build_memory_index", "Build a memory-based DiskANN index.")};
try
{
desc.add_options()("help,h", "Print information on arguments");
desc.add_options()("data_type", po::value<std::string>(&data_type)->required(), "data type <int8/uint8/float>");
desc.add_options()("dist_fn", po::value<std::string>(&dist_fn)->required(),
"distance function <l2/mips/cosine>");
desc.add_options()("data_path", po::value<std::string>(&data_path)->required(),
"Input data file in bin format");
desc.add_options()("index_path_prefix", po::value<std::string>(&index_path_prefix)->required(),
"Path prefix for saving index file components");
desc.add_options()("max_degree,R", po::value<uint32_t>(&R)->default_value(64), "Maximum graph degree");
desc.add_options()("Lbuild,L", po::value<uint32_t>(&L)->default_value(100),
"Build complexity, higher value results in better graphs");
desc.add_options()("alpha", po::value<float>(&alpha)->default_value(1.2f),
"alpha controls density and diameter of graph, set "
"1 for sparse graph, "
"1.2 or 1.4 for denser graphs with lower diameter");
desc.add_options()("num_threads,T", po::value<uint32_t>(&num_threads)->default_value(omp_get_num_procs()),
"Number of threads used for building index (defaults to "
"omp_get_num_procs())");
desc.add_options()("build_PQ_bytes", po::value<uint32_t>(&build_PQ_bytes)->default_value(0),
"Number of PQ bytes to build the index; 0 for full precision "
"build");
desc.add_options()("use_opq", po::bool_switch()->default_value(false),
"Set true for OPQ compression while using PQ "
"distance comparisons for "
"building the index, and false for PQ compression");
desc.add_options()("label_file", po::value<std::string>(&label_file)->default_value(""),
"Input label file in txt format for Filtered Index search. "
"The file should contain comma separated filters for each node "
"with each line corresponding to a graph node");
desc.add_options()("universal_label", po::value<std::string>(&universal_label)->default_value(""),
"Universal label, if using it, only in conjunction with "
"labels_file");
desc.add_options()("FilteredLbuild", po::value<uint32_t>(&Lf)->default_value(0),
"Build complexity for filtered points, higher value "
"results in better graphs");
desc.add_options()("label_type", po::value<std::string>(&label_type)->default_value("uint"),
"Storage type of Labels <uint/ushort>, default value is uint which "
"will consume memory 4 bytes per filter");

// Required parameters
po::options_description required_configs("Required");
required_configs.add_options()("data_type", po::value<std::string>(&data_type)->required(),
program_options_utils::DATA_TYPE_DESCRIPTION);
required_configs.add_options()("dist_fn", po::value<std::string>(&dist_fn)->required(),
program_options_utils::DISTANCE_FUNCTION_DESCRIPTION);
required_configs.add_options()("index_path_prefix", po::value<std::string>(&index_path_prefix)->required(),
program_options_utils::INDEX_PATH_PREFIX_DESCRIPTION);
required_configs.add_options()("data_path", po::value<std::string>(&data_path)->required(),
program_options_utils::INPUT_DATA_PATH);

// Optional parameters
po::options_description optional_configs("Optional");
optional_configs.add_options()("num_threads,T",
po::value<uint32_t>(&num_threads)->default_value(omp_get_num_procs()),
program_options_utils::NUMBER_THREADS_DESCRIPTION);
optional_configs.add_options()("max_degree,R", po::value<uint32_t>(&R)->default_value(64),
program_options_utils::MAX_BUILD_DEGREE);
optional_configs.add_options()("Lbuild,L", po::value<uint32_t>(&L)->default_value(100),
program_options_utils::GRAPH_BUILD_COMPLEXITY);
optional_configs.add_options()("alpha", po::value<float>(&alpha)->default_value(1.2f),
program_options_utils::GRAPH_BUILD_ALPHA);
optional_configs.add_options()("build_PQ_bytes", po::value<uint32_t>(&build_PQ_bytes)->default_value(0),
program_options_utils::BUIlD_GRAPH_PQ_BYTES);
optional_configs.add_options()("use_opq", po::bool_switch()->default_value(false),
program_options_utils::USE_OPQ);
optional_configs.add_options()("label_file", po::value<std::string>(&label_file)->default_value(""),
program_options_utils::LABEL_FILE);
optional_configs.add_options()("universal_label", po::value<std::string>(&universal_label)->default_value(""),
program_options_utils::UNIVERSAL_LABEL);

optional_configs.add_options()("FilteredLbuild", po::value<uint32_t>(&Lf)->default_value(0),
program_options_utils::FILTERED_LBUILD);
optional_configs.add_options()("label_type", po::value<std::string>(&label_type)->default_value("uint"),
program_options_utils::LABEL_TYPE_DESCRIPTION);

// Merge required and optional parameters
desc.add(required_configs).add(optional_configs);

po::variables_map vm;
po::store(po::parse_command_line(argc, argv, desc), vm);
Expand Down
Loading

0 comments on commit 2c9912a

Please sign in to comment.