diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h index 626065755364..7771ecd3b978 100644 --- a/include/LightGBM/c_api.h +++ b/include/LightGBM/c_api.h @@ -693,6 +693,14 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterCalcNumPredict(BoosterHandle handle, int num_iteration, int64_t* out_len); +/*! + * \brief Release FastConfig object. + * + * \param fastConfig Handle to the FastConfig object acquired with a `*FastInit()` method. + * \return LIGHTGBM_C_EXPORT LGBM_FastConfigFree + */ +LIGHTGBM_C_EXPORT int LGBM_FastConfigFree(FastConfigHandle fastConfig); + /*! * \brief Make prediction for a new dataset in CSR format. * \note @@ -778,6 +786,73 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSRSingleRow(BoosterHandle handle, int64_t* out_len, double* out_result); +/*! + * \brief Initialize and return a `FastConfigHandle` for use with `LGBM_BoosterPredictForCSRSingleRowFast`. + * + * Release the `FastConfig` by passing its handle to `LGBM_FastConfigFree` when no longer needed. + * + * \param handle Booster handle + * \param data_type Type of ``data`` pointer, can be ``C_API_DTYPE_FLOAT32`` or ``C_API_DTYPE_FLOAT64`` + * \param ncol Number of columns + * \param parameter Other parameters for prediction, e.g. early stopping for prediction + * \param[out] out_fastConfig FastConfig object with which you can call `LGBM_BoosterPredictForMatSingleRowFast` + * \return 0 when it succeeds, -1 when failure happens + */ +LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSRSingleRowFastInit(BoosterHandle handle, + const int data_type, + const int64_t num_col, + const char* parameter, + FastConfigHandle *out_fastConfig); + +/*! + * \brief Faster variant of `LGBM_BoosterPredictForCSRSingleRow`. + * + * Score single rows after setup with `LGBM_BoosterPredictForCSRSingleRowFastInit`. + * + * By removing the setup steps from this call extra optimizations can be made like + * initializing the config only once, instead of once per call. + * + * \note + * Setting up #threads is only done once at `LGBM_BoosterPredictForCSRSingleRowFastInit` + * instead of at each prediction. + * If you use a different #threads in other calls, you need to start the setup process over, + * or that number of threads will be used for this calls as well. + * + * \note + * You should pre-allocate memory for ``out_result``: + * - for normal and raw score, its length is equal to ``num_class * num_data``; + * - for leaf index, its length is equal to ``num_class * num_data * num_iteration``; + * - for feature contributions, its length is equal to ``num_class * num_data * (num_feature + 1)``. + * + * \param fastConfig_handle FastConfig object handle returned by `LGBM_BoosterPredictForCSRSingleRowFastInit` + * \param indptr Pointer to row headers + * \param indptr_type Type of ``indptr``, can be ``C_API_DTYPE_INT32`` or ``C_API_DTYPE_INT64`` + * \param indices Pointer to column indices + * \param data Pointer to the data space + * \param nindptr Number of rows in the matrix + 1 + * \param nelem Number of nonzero elements in the matrix + * \param predict_type What should be predicted + * - ``C_API_PREDICT_NORMAL``: normal prediction, with transform (if needed); + * - ``C_API_PREDICT_RAW_SCORE``: raw score; + * - ``C_API_PREDICT_LEAF_INDEX``: leaf index; + * - ``C_API_PREDICT_CONTRIB``: feature contributions (SHAP values) + * \param num_iteration Number of iterations for prediction, <= 0 means no limit + * \param[out] out_len Length of output result + * \param[out] out_result Pointer to array with predictions + * \return 0 when succeed, -1 when failure happens + */ +LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSRSingleRowFast(FastConfigHandle fastConfig_handle, + const void* indptr, + int indptr_type, + const int32_t* indices, + const void* data, + int64_t nindptr, + int64_t nelem, + int predict_type, + int num_iteration, + int64_t* out_len, + double* out_result); + /*! * \brief Make prediction for a new dataset in CSC format. * \note @@ -891,14 +966,6 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMatSingleRow(BoosterHandle handle, int64_t* out_len, double* out_result); -/*! - * \brief Release FastConfig object. - * - * \param fastConfig Handle to the FastConfig object acquired with a `*FastInit()` method. - * \return LIGHTGBM_C_EXPORT LGBM_FastConfigFree - */ -LIGHTGBM_C_EXPORT int LGBM_FastConfigFree(FastConfigHandle fastConfig); - /*! * \brief Initialize and return a `FastConfigHandle` for use with `LGBM_BoosterPredictForMatSingleRowFast`. * @@ -918,7 +985,18 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMatSingleRowFastInit(BoosterHandle h FastConfigHandle *out_fastConfig); /*! - * \brief Score a single row after setup with `LGBM_BoosterPredictForMatSingleRowFastInit`. + * \brief Faster variant of `LGBM_BoosterPredictForMatSingleRow`. + * + * Score a single row after setup with `LGBM_BoosterPredictForMatSingleRowFastInit`. + * + * By removing the setup steps from this call extra optimizations can be made like + * initializing the config only once, instead of once per call. + * + * \note + * Setting up #threads is only done once at `LGBM_BoosterPredictForMatSingleRowFastInit` + * instead of at each prediction. + * If you use a different #threads in other calls, you need to start the setup process over, + * or that number of threads will be used for this calls as well. * * \param fastConfig_handle FastConfig object handle returned by `LGBM_BoosterPredictForMatSingleRowFastInit` * \param data Single-row array data (no other way than row-major form). diff --git a/src/c_api.cpp b/src/c_api.cpp index 8073941505cf..ac11a619030e 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -1485,6 +1485,56 @@ int LGBM_BoosterCalcNumPredict(BoosterHandle handle, API_END(); } +/*! + * \brief Union to hold different int type values. + * + * Introduced with FastConfig to support multiple num_col types + * that show up in the rest of the C API prediction methods. + */ +union IntUnion { + int32_t int32; + int64_t int64; +}; + +/*! + * \brief Object to store resources meant for single-row Fast Predict methods. + * + * Meant to be used as a basic struct by the *Fast* predict methods only. + * It stores the configuration resources for reuse during prediction. + * + * Even the row function is stored. We score the instance at the same memory + * address all the time. One just replaces the feature values at that address + * and scores again with the *Fast* methods. + */ +struct FastConfig { + FastConfig(Booster *const booster_ptr, + const char *parameter, + const int data_type_, + const int32_t num_cols) : booster(booster_ptr), data_type(data_type_) { + ncol.int32 = num_cols; + config.Set(Config::Str2Map(parameter)); + } + + FastConfig(Booster *const booster_ptr, + const char *parameter, + const int data_type_, + const int64_t num_cols) : booster(booster_ptr), data_type(data_type_) { + ncol.int64 = num_cols; + config.Set(Config::Str2Map(parameter)); + } + + Booster* const booster; + Config config; + const int data_type; + IntUnion ncol; +}; + +int LGBM_FastConfigFree(FastConfigHandle fastConfig) { + API_BEGIN(); + delete reinterpret_cast(fastConfig); + API_END(); +} + int LGBM_BoosterPredictForCSR(BoosterHandle handle, const void* indptr, int indptr_type, @@ -1551,6 +1601,51 @@ int LGBM_BoosterPredictForCSRSingleRow(BoosterHandle handle, API_END(); } +int LGBM_BoosterPredictForCSRSingleRowFastInit(BoosterHandle handle, + const int data_type, + const int64_t num_col, + const char* parameter, + FastConfigHandle *out_fastConfig) { + API_BEGIN(); + if (num_col <= 0) { + Log::Fatal("The number of columns should be greater than zero."); + } else if (num_col >= INT32_MAX) { + Log::Fatal("The number of columns should be smaller than INT32_MAX."); + } + + auto fastConfig_ptr = std::unique_ptr(new FastConfig( + reinterpret_cast(handle), + parameter, + data_type, + num_col)); + + if (fastConfig_ptr->config.num_threads > 0) { + omp_set_num_threads(fastConfig_ptr->config.num_threads); + } + + *out_fastConfig = fastConfig_ptr.release(); + API_END(); +} + +int LGBM_BoosterPredictForCSRSingleRowFast(FastConfigHandle fastConfig_handle, + const void* indptr, + int indptr_type, + const int32_t* indices, + const void* data, + int64_t nindptr, + int64_t nelem, + int predict_type, + int num_iteration, + int64_t* out_len, + double* out_result) { + API_BEGIN(); + FastConfig *fastConfig = reinterpret_cast(fastConfig_handle); + auto get_row_fun = RowFunctionFromCSR(indptr, indptr_type, indices, data, fastConfig->data_type, nindptr, nelem); + fastConfig->booster->PredictSingleRow(num_iteration, predict_type, static_cast(fastConfig->ncol.int64), + get_row_fun, fastConfig->config, out_result, out_len); + API_END(); +} + int LGBM_BoosterPredictForCSC(BoosterHandle handle, const void* col_ptr, @@ -1648,51 +1743,6 @@ int LGBM_BoosterPredictForMatSingleRow(BoosterHandle handle, API_END(); } -/*! - * \brief Object to store resources meant for single-row Fast Predict methods. - * - * Meant to be used as a basic struct by the *Fast* predict methods only. - * It stores the configuration resources for reuse during prediction. - * - * Even the row function is stored. We score the instance at the same memory - * address all the time. One just replaces the feature values at that address - * and scores again with the *Fast* methods. - */ -struct FastConfig { - public: - FastConfig(Booster *const booster_ptr, - const char *parameter, - const int data_type, - const int32_t num_cols) : _booster(booster_ptr), _data_type(data_type), _ncol(num_cols) { - _config.Set(Config::Str2Map(parameter)); - } - - friend int LGBM_BoosterPredictForMatSingleRowFastInit(BoosterHandle handle, - const int data_type, - const int32_t ncol, - const char* parameter, - FastConfigHandle *out_fastConfig); - - friend int LGBM_BoosterPredictForMatSingleRowFast(FastConfigHandle fast_config_handle, - const void* data, - const int predict_type, - const int num_iteration, - int64_t* out_len, - double* out_result); - - private: - Booster* const _booster; - Config _config; - const int _data_type; - const int32_t _ncol; -}; - -int LGBM_FastConfigFree(FastConfigHandle fastConfig) { - API_BEGIN(); - delete reinterpret_cast(fastConfig); - API_END(); -} - int LGBM_BoosterPredictForMatSingleRowFastInit(BoosterHandle handle, const int data_type, const int32_t ncol, @@ -1705,8 +1755,8 @@ int LGBM_BoosterPredictForMatSingleRowFastInit(BoosterHandle handle, data_type, ncol)); - if (fastConfig_ptr->_config.num_threads > 0) { - omp_set_num_threads(fastConfig_ptr->_config.num_threads); + if (fastConfig_ptr->config.num_threads > 0) { + omp_set_num_threads(fastConfig_ptr->config.num_threads); } *out_fastConfig = fastConfig_ptr.release(); @@ -1721,10 +1771,11 @@ int LGBM_BoosterPredictForMatSingleRowFast(FastConfigHandle fastConfig_handle, double* out_result) { API_BEGIN(); FastConfig *fastConfig = reinterpret_cast(fastConfig_handle); - auto get_row_fun = RowPairFunctionFromDenseMatric(data, 1, fastConfig->_ncol, fastConfig->_data_type, 1); // Single row in row-major format. - fastConfig->_booster->PredictSingleRow(num_iteration, predict_type, - fastConfig->_ncol, get_row_fun, fastConfig->_config, - out_result, out_len); + // Single row in row-major format: + auto get_row_fun = RowPairFunctionFromDenseMatric(data, 1, fastConfig->ncol.int32, fastConfig->data_type, 1); + fastConfig->booster->PredictSingleRow(num_iteration, predict_type, fastConfig->ncol.int32, + get_row_fun, fastConfig->config, + out_result, out_len); API_END(); } diff --git a/swig/lightgbmlib.i b/swig/lightgbmlib.i index 985dfb481f2a..3a4b6f626e3d 100644 --- a/swig/lightgbmlib.i +++ b/swig/lightgbmlib.i @@ -95,6 +95,32 @@ return ret; } + /*! \brief Even faster variant of `LGBM_BoosterPredictForMatSingle`. + * + * Uses `LGBM_BoosterPredictForMatSingleRowFast` which is faster + * than `LGBM_BoosterPredictForMatSingleRow` and the trick of + * `LGBM_BoosterPredictForMatSingle` to capture the Java data array + * using `GetPrimitiveArrayCritical`, which can yield faster access + * to the array if the JVM passes the actual address to the C++ side + * instead of performing a copy. + */ + int LGBM_BoosterPredictForMatSingleRowFastCriticalSWIG(JNIEnv *jenv, + jdoubleArray data, + FastConfigHandle handle, + int predict_type, + int num_iteration, + int64_t* out_len, + double* out_result) { + double* data0 = (double*)jenv->GetPrimitiveArrayCritical(data, 0); + + int ret = LGBM_BoosterPredictForMatSingleRowFast(handle, data0, predict_type, + num_iteration, out_len, out_result); + + jenv->ReleasePrimitiveArrayCritical(data, data0, JNI_ABORT); + + return ret; + } + int LGBM_BoosterPredictForCSRSingle(JNIEnv *jenv, jintArray indices, jdoubleArray values, @@ -130,6 +156,50 @@ return ret; } + /*! \brief Even faster variant of `LGBM_BoosterPredictForCSRSingle`. + * + * Uses `LGBM_BoosterPredictForCSRSingleRowFast` which is faster + * than `LGBM_BoosterPredictForMatSingleRow` and the trick of + * `LGBM_BoosterPredictForCSRSingle` to capture the Java data array + * using `GetPrimitiveArrayCritical`, which can yield faster access + * to the array if the JVM passes the actual address to the C++ side + * instead of performing a copy. + */ + int LGBM_BoosterPredictForCSRSingleRowFastCriticalSWIG(JNIEnv *jenv, + jintArray indices, + jdoubleArray values, + int numNonZeros, + FastConfigHandle handle, + int indptr_type, + //int data_type, + int64_t nelem, + //int64_t num_col, + int predict_type, + int num_iteration, + //const char* parameter, + int64_t* out_len, + double* out_result) { + // Alternatives + // - GetIntArrayElements: performs copy + // - GetDirectBufferAddress: fails on wrapped array + // Some words of warning for GetPrimitiveArrayCritical + // https://stackoverflow.com/questions/23258357/whats-the-trade-off-between-using-getprimitivearraycritical-and-getprimitivety + + jboolean isCopy; + int* indices0 = (int*)jenv->GetPrimitiveArrayCritical(indices, &isCopy); + double* values0 = (double*)jenv->GetPrimitiveArrayCritical(values, &isCopy); + + int32_t ind[2] = { 0, numNonZeros }; + + int ret = LGBM_BoosterPredictForCSRSingleRowFast(handle, ind, indptr_type, indices0, values0, 2, + nelem, predict_type, num_iteration, out_len, out_result); + + jenv->ReleasePrimitiveArrayCritical(values, values0, JNI_ABORT); + jenv->ReleasePrimitiveArrayCritical(indices, indices0, JNI_ABORT); + + return ret; + } + #include #include