Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/optimize single prediction #2992

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 128 additions & 1 deletion include/LightGBM/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

typedef void* DatasetHandle; /*!< \brief Handle of dataset. */
typedef void* BoosterHandle; /*!< \brief Handle of booster. */
typedef void* FastConfigHandle; /*!< \brief Handle of FastConfig. */

#define C_API_DTYPE_FLOAT32 (0) /*!< \brief float32 (single precision float). */
#define C_API_DTYPE_FLOAT64 (1) /*!< \brief float64 (double precision float). */
Expand Down Expand Up @@ -577,7 +578,7 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalCounts(BoosterHandle handle,
* \param len Number of ``char*`` pointers stored at ``out_strs``.
* If smaller than the max size, only this many strings are copied
* \param[out] out_len Total number of evaluation datasets
* \param buffer_len Size of pre-allocated strings.
* \param buffer_len Size of pre-allocated strings.
* Content is copied up to ``buffer_len - 1`` and null-terminated
* \param[out] out_buffer_len String sizes required to do the full string copies
* \param[out] out_strs Names of evaluation datasets, should pre-allocate memory
Expand Down Expand Up @@ -703,6 +704,14 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterCalcNumPredict(BoosterHandle handle,
int num_iteration,
int64_t* out_len);

/*!
* \brief Release FastConfig object.
*
* \param fastConfig Handle to the FastConfig object acquired with a ``*FastInit()`` method.
* \return 0 when it succeeds, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_FastConfigFree(FastConfigHandle fastConfig);

/*!
* \brief Make prediction for a new dataset in CSR format.
* \note
Expand Down Expand Up @@ -844,6 +853,73 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSRSingleRow(BoosterHandle handle,
int64_t* out_len,
double* out_result);

/*!
* \brief Initialize and return a ``FastConfigHandle`` for use with ``LGBM_BoosterPredictForCSRSingleRowFast``.
*
* Release the ``FastConfig`` by passing its handle to ``LGBM_FastConfigFree`` when no longer needed.
*
* \param handle Booster handle
* \param data_type Type of ``data`` pointer, can be ``C_API_DTYPE_FLOAT32`` or ``C_API_DTYPE_FLOAT64``
* \param num_col Number of columns
* \param parameter Other parameters for prediction, e.g. early stopping for prediction
* \param[out] out_fastConfig FastConfig object with which you can call ``LGBM_BoosterPredictForCSRSingleRowFast``
* \return 0 when it succeeds, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSRSingleRowFastInit(BoosterHandle handle,
const int data_type,
const int64_t num_col,
const char* parameter,
FastConfigHandle *out_fastConfig);

/*!
* \brief Faster variant of ``LGBM_BoosterPredictForCSRSingleRow``.
*
* Score single rows after setup with ``LGBM_BoosterPredictForCSRSingleRowFastInit``.
*
* By removing the setup steps from this call extra optimizations can be made like
* initializing the config only once, instead of once per call.
*
* \note
* Setting up the number of threads is only done once at ``LGBM_BoosterPredictForCSRSingleRowFastInit``
* instead of at each prediction.
* If you use a different number of threads in other calls, you need to start the setup process over,
* or that number of threads will be used for these calls as well.
*
* \note
* You should pre-allocate memory for ``out_result``:
* - for normal and raw score, its length is equal to ``num_class * num_data``;
* - for leaf index, its length is equal to ``num_class * num_data * num_iteration``;
* - for feature contributions, its length is equal to ``num_class * num_data * (num_feature + 1)``.
*
* \param fastConfig_handle FastConfig object handle returned by ``LGBM_BoosterPredictForCSRSingleRowFastInit``
* \param indptr Pointer to row headers
* \param indptr_type Type of ``indptr``, can be ``C_API_DTYPE_INT32`` or ``C_API_DTYPE_INT64``
* \param indices Pointer to column indices
* \param data Pointer to the data space
* \param nindptr Number of rows in the matrix + 1
* \param nelem Number of nonzero elements in the matrix
* \param predict_type What should be predicted
* - ``C_API_PREDICT_NORMAL``: normal prediction, with transform (if needed);
* - ``C_API_PREDICT_RAW_SCORE``: raw score;
* - ``C_API_PREDICT_LEAF_INDEX``: leaf index;
* - ``C_API_PREDICT_CONTRIB``: feature contributions (SHAP values)
* \param num_iteration Number of iterations for prediction, <= 0 means no limit
* \param[out] out_len Length of output result
* \param[out] out_result Pointer to array with predictions
* \return 0 when succeed, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSRSingleRowFast(FastConfigHandle fastConfig_handle,
const void* indptr,
int indptr_type,
const int32_t* indices,
const void* data,
int64_t nindptr,
int64_t nelem,
int predict_type,
int num_iteration,
int64_t* out_len,
double* out_result);

/*!
* \brief Make prediction for a new dataset in CSC format.
* \note
Expand Down Expand Up @@ -957,6 +1033,57 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMatSingleRow(BoosterHandle handle,
int64_t* out_len,
double* out_result);

/*!
* \brief Initialize and return a ``FastConfigHandle`` for use with ``LGBM_BoosterPredictForMatSingleRowFast``.
*
* Release the ``FastConfig`` by passing its handle to ``LGBM_FastConfigFree`` when no longer needed.
*
* \param handle Booster handle
* \param data_type Type of ``data`` pointer, can be ``C_API_DTYPE_FLOAT32`` or ``C_API_DTYPE_FLOAT64``
* \param ncol Number of columns
* \param parameter Other parameters for prediction, e.g. early stopping for prediction
* \param[out] out_fastConfig FastConfig object with which you can call ``LGBM_BoosterPredictForMatSingleRowFast``
* \return 0 when it succeeds, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMatSingleRowFastInit(BoosterHandle handle,
int data_type,
int32_t ncol,
const char* parameter,
FastConfigHandle *out_fastConfig);

/*!
* \brief Faster variant of ``LGBM_BoosterPredictForMatSingleRow``.
*
* Score a single row after setup with ``LGBM_BoosterPredictForMatSingleRowFastInit``.
*
* By removing the setup steps from this call extra optimizations can be made like
* initializing the config only once, instead of once per call.
*
* \note
* Setting up the number of threads is only done once at ``LGBM_BoosterPredictForMatSingleRowFastInit``
* instead of at each prediction.
* If you use a different number of threads in other calls, you need to start the setup process over,
* or that number of threads will be used for these calls as well.
*
* \param fastConfig_handle FastConfig object handle returned by ``LGBM_BoosterPredictForMatSingleRowFastInit``
* \param data Single-row array data (no other way than row-major form).
* \param predict_type What should be predicted
* - ``C_API_PREDICT_NORMAL``: normal prediction, with transform (if needed);
* - ``C_API_PREDICT_RAW_SCORE``: raw score;
* - ``C_API_PREDICT_LEAF_INDEX``: leaf index;
* - ``C_API_PREDICT_CONTRIB``: feature contributions (SHAP values)
* \param num_iteration Number of iteration for prediction, <= 0 means no limit
* \param[out] out_len Length of output result
* \param[out] out_result Pointer to array with predictions
* \return 0 when it succeeds, -1 when failure happens
*/
LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMatSingleRowFast(FastConfigHandle fastConfig_handle,
const void* data,
int predict_type,
int num_iteration,
int64_t* out_len,
double* out_result);

/*!
* \brief Make prediction for a new dataset presented in a form of array of pointers to rows.
* \note
Expand Down
111 changes: 111 additions & 0 deletions src/c_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1735,6 +1735,36 @@ int LGBM_BoosterCalcNumPredict(BoosterHandle handle,
API_END();
}

/*!
* \brief Object to store resources meant for single-row Fast Predict methods.
*
* Meant to be used as a basic struct by the *Fast* predict methods only.
* It stores the configuration resources for reuse during prediction.
*
* Even the row function is stored. We score the instance at the same memory
* address all the time. One just replaces the feature values at that address
* and scores again with the *Fast* methods.
*/
struct FastConfig {
FastConfig(Booster *const booster_ptr,
const char *parameter,
const int data_type_,
const int32_t num_cols) : booster(booster_ptr), data_type(data_type_), ncol(num_cols) {
config.Set(Config::Str2Map(parameter));
}

Booster* const booster;
Config config;
const int data_type;
const int32_t ncol;
};

int LGBM_FastConfigFree(FastConfigHandle fastConfig) {
API_BEGIN();
delete reinterpret_cast<FastConfig*>(fastConfig);
API_END();
}

int LGBM_BoosterPredictForCSR(BoosterHandle handle,
const void* indptr,
int indptr_type,
Expand Down Expand Up @@ -1886,6 +1916,51 @@ int LGBM_BoosterPredictForCSRSingleRow(BoosterHandle handle,
API_END();
}

int LGBM_BoosterPredictForCSRSingleRowFastInit(BoosterHandle handle,
const int data_type,
const int64_t num_col,
const char* parameter,
FastConfigHandle *out_fastConfig) {
API_BEGIN();
if (num_col <= 0) {
Log::Fatal("The number of columns should be greater than zero.");
} else if (num_col >= INT32_MAX) {
Log::Fatal("The number of columns should be smaller than INT32_MAX.");
}
AlbertoEAF marked this conversation as resolved.
Show resolved Hide resolved

auto fastConfig_ptr = std::unique_ptr<FastConfig>(new FastConfig(
reinterpret_cast<Booster*>(handle),
parameter,
data_type,
static_cast<int32_t>(num_col)));

if (fastConfig_ptr->config.num_threads > 0) {
omp_set_num_threads(fastConfig_ptr->config.num_threads);
}

*out_fastConfig = fastConfig_ptr.release();
API_END();
}

int LGBM_BoosterPredictForCSRSingleRowFast(FastConfigHandle fastConfig_handle,
const void* indptr,
int indptr_type,
const int32_t* indices,
const void* data,
int64_t nindptr,
int64_t nelem,
int predict_type,
int num_iteration,
int64_t* out_len,
double* out_result) {
API_BEGIN();
FastConfig *fastConfig = reinterpret_cast<FastConfig*>(fastConfig_handle);
auto get_row_fun = RowFunctionFromCSR<int>(indptr, indptr_type, indices, data, fastConfig->data_type, nindptr, nelem);
fastConfig->booster->PredictSingleRow(num_iteration, predict_type, fastConfig->ncol,
get_row_fun, fastConfig->config, out_result, out_len);
API_END();
}


int LGBM_BoosterPredictForCSC(BoosterHandle handle,
const void* col_ptr,
Expand Down Expand Up @@ -1983,6 +2058,42 @@ int LGBM_BoosterPredictForMatSingleRow(BoosterHandle handle,
API_END();
}

int LGBM_BoosterPredictForMatSingleRowFastInit(BoosterHandle handle,
const int data_type,
const int32_t ncol,
const char* parameter,
FastConfigHandle *out_fastConfig) {
API_BEGIN();
auto fastConfig_ptr = std::unique_ptr<FastConfig>(new FastConfig(
reinterpret_cast<Booster*>(handle),
parameter,
data_type,
ncol));

if (fastConfig_ptr->config.num_threads > 0) {
omp_set_num_threads(fastConfig_ptr->config.num_threads);
}

*out_fastConfig = fastConfig_ptr.release();
API_END();
}

int LGBM_BoosterPredictForMatSingleRowFast(FastConfigHandle fastConfig_handle,
const void* data,
const int predict_type,
const int num_iteration,
int64_t* out_len,
double* out_result) {
API_BEGIN();
FastConfig *fastConfig = reinterpret_cast<FastConfig*>(fastConfig_handle);
// Single row in row-major format:
auto get_row_fun = RowPairFunctionFromDenseMatric(data, 1, fastConfig->ncol, fastConfig->data_type, 1);
fastConfig->booster->PredictSingleRow(num_iteration, predict_type, fastConfig->ncol,
get_row_fun, fastConfig->config,
out_result, out_len);
API_END();
}


int LGBM_BoosterPredictForMats(BoosterHandle handle,
const void** data,
Expand Down
70 changes: 70 additions & 0 deletions swig/lightgbmlib.i
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,32 @@
return ret;
}

/*! \brief Even faster variant of `LGBM_BoosterPredictForMatSingle`.
*
* Uses `LGBM_BoosterPredictForMatSingleRowFast` which is faster
* than `LGBM_BoosterPredictForMatSingleRow` and the trick of
* `LGBM_BoosterPredictForMatSingle` to capture the Java data array
* using `GetPrimitiveArrayCritical`, which can yield faster access
* to the array if the JVM passes the actual address to the C++ side
* instead of performing a copy.
*/
int LGBM_BoosterPredictForMatSingleRowFastCriticalSWIG(JNIEnv *jenv,
jdoubleArray data,
FastConfigHandle handle,
int predict_type,
int num_iteration,
int64_t* out_len,
double* out_result) {
double* data0 = (double*)jenv->GetPrimitiveArrayCritical(data, 0);

int ret = LGBM_BoosterPredictForMatSingleRowFast(handle, data0, predict_type,
num_iteration, out_len, out_result);

jenv->ReleasePrimitiveArrayCritical(data, data0, JNI_ABORT);

return ret;
}

int LGBM_BoosterPredictForCSRSingle(JNIEnv *jenv,
jintArray indices,
jdoubleArray values,
Expand Down Expand Up @@ -130,6 +156,50 @@
return ret;
}

/*! \brief Even faster variant of `LGBM_BoosterPredictForCSRSingle`.
*
* Uses `LGBM_BoosterPredictForCSRSingleRowFast` which is faster
* than `LGBM_BoosterPredictForMatSingleRow` and the trick of
* `LGBM_BoosterPredictForCSRSingle` to capture the Java data array
* using `GetPrimitiveArrayCritical`, which can yield faster access
* to the array if the JVM passes the actual address to the C++ side
* instead of performing a copy.
*/
int LGBM_BoosterPredictForCSRSingleRowFastCriticalSWIG(JNIEnv *jenv,
jintArray indices,
jdoubleArray values,
int numNonZeros,
FastConfigHandle handle,
int indptr_type,
//int data_type,
int64_t nelem,
//int64_t num_col,
int predict_type,
int num_iteration,
//const char* parameter,
int64_t* out_len,
double* out_result) {
// Alternatives
// - GetIntArrayElements: performs copy
// - GetDirectBufferAddress: fails on wrapped array
// Some words of warning for GetPrimitiveArrayCritical
// https://stackoverflow.com/questions/23258357/whats-the-trade-off-between-using-getprimitivearraycritical-and-getprimitivety

jboolean isCopy;
int* indices0 = (int*)jenv->GetPrimitiveArrayCritical(indices, &isCopy);
double* values0 = (double*)jenv->GetPrimitiveArrayCritical(values, &isCopy);

int32_t ind[2] = { 0, numNonZeros };

int ret = LGBM_BoosterPredictForCSRSingleRowFast(handle, ind, indptr_type, indices0, values0, 2,
nelem, predict_type, num_iteration, out_len, out_result);

jenv->ReleasePrimitiveArrayCritical(values, values0, JNI_ABORT);
jenv->ReleasePrimitiveArrayCritical(indices, indices0, JNI_ABORT);

return ret;
}

#include <functional>
#include <vector>

Expand Down