diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h index ac2164a93775..b96dfb4fc01c 100644 --- a/include/LightGBM/c_api.h +++ b/include/LightGBM/c_api.h @@ -22,6 +22,7 @@ typedef void* DatasetHandle; /*!< \brief Handle of dataset. */ typedef void* BoosterHandle; /*!< \brief Handle of booster. */ +typedef void* FastConfigHandle; /*!< \brief Handle of FastConfig. */ #define C_API_DTYPE_FLOAT32 (0) /*!< \brief float32 (single precision float). */ #define C_API_DTYPE_FLOAT64 (1) /*!< \brief float64 (double precision float). */ @@ -577,7 +578,7 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetEvalCounts(BoosterHandle handle, * \param len Number of ``char*`` pointers stored at ``out_strs``. * If smaller than the max size, only this many strings are copied * \param[out] out_len Total number of evaluation datasets - * \param buffer_len Size of pre-allocated strings. + * \param buffer_len Size of pre-allocated strings. * Content is copied up to ``buffer_len - 1`` and null-terminated * \param[out] out_buffer_len String sizes required to do the full string copies * \param[out] out_strs Names of evaluation datasets, should pre-allocate memory @@ -703,6 +704,14 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterCalcNumPredict(BoosterHandle handle, int num_iteration, int64_t* out_len); +/*! + * \brief Release FastConfig object. + * + * \param fastConfig Handle to the FastConfig object acquired with a ``*FastInit()`` method. + * \return 0 when it succeeds, -1 when failure happens + */ +LIGHTGBM_C_EXPORT int LGBM_FastConfigFree(FastConfigHandle fastConfig); + /*! * \brief Make prediction for a new dataset in CSR format. * \note @@ -844,6 +853,73 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSRSingleRow(BoosterHandle handle, int64_t* out_len, double* out_result); +/*! + * \brief Initialize and return a ``FastConfigHandle`` for use with ``LGBM_BoosterPredictForCSRSingleRowFast``. + * + * Release the ``FastConfig`` by passing its handle to ``LGBM_FastConfigFree`` when no longer needed. + * + * \param handle Booster handle + * \param data_type Type of ``data`` pointer, can be ``C_API_DTYPE_FLOAT32`` or ``C_API_DTYPE_FLOAT64`` + * \param num_col Number of columns + * \param parameter Other parameters for prediction, e.g. early stopping for prediction + * \param[out] out_fastConfig FastConfig object with which you can call ``LGBM_BoosterPredictForCSRSingleRowFast`` + * \return 0 when it succeeds, -1 when failure happens + */ +LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSRSingleRowFastInit(BoosterHandle handle, + const int data_type, + const int64_t num_col, + const char* parameter, + FastConfigHandle *out_fastConfig); + +/*! + * \brief Faster variant of ``LGBM_BoosterPredictForCSRSingleRow``. + * + * Score single rows after setup with ``LGBM_BoosterPredictForCSRSingleRowFastInit``. + * + * By removing the setup steps from this call extra optimizations can be made like + * initializing the config only once, instead of once per call. + * + * \note + * Setting up the number of threads is only done once at ``LGBM_BoosterPredictForCSRSingleRowFastInit`` + * instead of at each prediction. + * If you use a different number of threads in other calls, you need to start the setup process over, + * or that number of threads will be used for these calls as well. + * + * \note + * You should pre-allocate memory for ``out_result``: + * - for normal and raw score, its length is equal to ``num_class * num_data``; + * - for leaf index, its length is equal to ``num_class * num_data * num_iteration``; + * - for feature contributions, its length is equal to ``num_class * num_data * (num_feature + 1)``. + * + * \param fastConfig_handle FastConfig object handle returned by ``LGBM_BoosterPredictForCSRSingleRowFastInit`` + * \param indptr Pointer to row headers + * \param indptr_type Type of ``indptr``, can be ``C_API_DTYPE_INT32`` or ``C_API_DTYPE_INT64`` + * \param indices Pointer to column indices + * \param data Pointer to the data space + * \param nindptr Number of rows in the matrix + 1 + * \param nelem Number of nonzero elements in the matrix + * \param predict_type What should be predicted + * - ``C_API_PREDICT_NORMAL``: normal prediction, with transform (if needed); + * - ``C_API_PREDICT_RAW_SCORE``: raw score; + * - ``C_API_PREDICT_LEAF_INDEX``: leaf index; + * - ``C_API_PREDICT_CONTRIB``: feature contributions (SHAP values) + * \param num_iteration Number of iterations for prediction, <= 0 means no limit + * \param[out] out_len Length of output result + * \param[out] out_result Pointer to array with predictions + * \return 0 when succeed, -1 when failure happens + */ +LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForCSRSingleRowFast(FastConfigHandle fastConfig_handle, + const void* indptr, + int indptr_type, + const int32_t* indices, + const void* data, + int64_t nindptr, + int64_t nelem, + int predict_type, + int num_iteration, + int64_t* out_len, + double* out_result); + /*! * \brief Make prediction for a new dataset in CSC format. * \note @@ -957,6 +1033,57 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMatSingleRow(BoosterHandle handle, int64_t* out_len, double* out_result); +/*! + * \brief Initialize and return a ``FastConfigHandle`` for use with ``LGBM_BoosterPredictForMatSingleRowFast``. + * + * Release the ``FastConfig`` by passing its handle to ``LGBM_FastConfigFree`` when no longer needed. + * + * \param handle Booster handle + * \param data_type Type of ``data`` pointer, can be ``C_API_DTYPE_FLOAT32`` or ``C_API_DTYPE_FLOAT64`` + * \param ncol Number of columns + * \param parameter Other parameters for prediction, e.g. early stopping for prediction + * \param[out] out_fastConfig FastConfig object with which you can call ``LGBM_BoosterPredictForMatSingleRowFast`` + * \return 0 when it succeeds, -1 when failure happens + */ +LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMatSingleRowFastInit(BoosterHandle handle, + int data_type, + int32_t ncol, + const char* parameter, + FastConfigHandle *out_fastConfig); + +/*! + * \brief Faster variant of ``LGBM_BoosterPredictForMatSingleRow``. + * + * Score a single row after setup with ``LGBM_BoosterPredictForMatSingleRowFastInit``. + * + * By removing the setup steps from this call extra optimizations can be made like + * initializing the config only once, instead of once per call. + * + * \note + * Setting up the number of threads is only done once at ``LGBM_BoosterPredictForMatSingleRowFastInit`` + * instead of at each prediction. + * If you use a different number of threads in other calls, you need to start the setup process over, + * or that number of threads will be used for these calls as well. + * + * \param fastConfig_handle FastConfig object handle returned by ``LGBM_BoosterPredictForMatSingleRowFastInit`` + * \param data Single-row array data (no other way than row-major form). + * \param predict_type What should be predicted + * - ``C_API_PREDICT_NORMAL``: normal prediction, with transform (if needed); + * - ``C_API_PREDICT_RAW_SCORE``: raw score; + * - ``C_API_PREDICT_LEAF_INDEX``: leaf index; + * - ``C_API_PREDICT_CONTRIB``: feature contributions (SHAP values) + * \param num_iteration Number of iteration for prediction, <= 0 means no limit + * \param[out] out_len Length of output result + * \param[out] out_result Pointer to array with predictions + * \return 0 when it succeeds, -1 when failure happens + */ +LIGHTGBM_C_EXPORT int LGBM_BoosterPredictForMatSingleRowFast(FastConfigHandle fastConfig_handle, + const void* data, + int predict_type, + int num_iteration, + int64_t* out_len, + double* out_result); + /*! * \brief Make prediction for a new dataset presented in a form of array of pointers to rows. * \note diff --git a/src/c_api.cpp b/src/c_api.cpp index ec77673b28a8..8ce7fd3e175a 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -1735,6 +1735,36 @@ int LGBM_BoosterCalcNumPredict(BoosterHandle handle, API_END(); } +/*! + * \brief Object to store resources meant for single-row Fast Predict methods. + * + * Meant to be used as a basic struct by the *Fast* predict methods only. + * It stores the configuration resources for reuse during prediction. + * + * Even the row function is stored. We score the instance at the same memory + * address all the time. One just replaces the feature values at that address + * and scores again with the *Fast* methods. + */ +struct FastConfig { + FastConfig(Booster *const booster_ptr, + const char *parameter, + const int data_type_, + const int32_t num_cols) : booster(booster_ptr), data_type(data_type_), ncol(num_cols) { + config.Set(Config::Str2Map(parameter)); + } + + Booster* const booster; + Config config; + const int data_type; + const int32_t ncol; +}; + +int LGBM_FastConfigFree(FastConfigHandle fastConfig) { + API_BEGIN(); + delete reinterpret_cast(fastConfig); + API_END(); +} + int LGBM_BoosterPredictForCSR(BoosterHandle handle, const void* indptr, int indptr_type, @@ -1886,6 +1916,51 @@ int LGBM_BoosterPredictForCSRSingleRow(BoosterHandle handle, API_END(); } +int LGBM_BoosterPredictForCSRSingleRowFastInit(BoosterHandle handle, + const int data_type, + const int64_t num_col, + const char* parameter, + FastConfigHandle *out_fastConfig) { + API_BEGIN(); + if (num_col <= 0) { + Log::Fatal("The number of columns should be greater than zero."); + } else if (num_col >= INT32_MAX) { + Log::Fatal("The number of columns should be smaller than INT32_MAX."); + } + + auto fastConfig_ptr = std::unique_ptr(new FastConfig( + reinterpret_cast(handle), + parameter, + data_type, + static_cast(num_col))); + + if (fastConfig_ptr->config.num_threads > 0) { + omp_set_num_threads(fastConfig_ptr->config.num_threads); + } + + *out_fastConfig = fastConfig_ptr.release(); + API_END(); +} + +int LGBM_BoosterPredictForCSRSingleRowFast(FastConfigHandle fastConfig_handle, + const void* indptr, + int indptr_type, + const int32_t* indices, + const void* data, + int64_t nindptr, + int64_t nelem, + int predict_type, + int num_iteration, + int64_t* out_len, + double* out_result) { + API_BEGIN(); + FastConfig *fastConfig = reinterpret_cast(fastConfig_handle); + auto get_row_fun = RowFunctionFromCSR(indptr, indptr_type, indices, data, fastConfig->data_type, nindptr, nelem); + fastConfig->booster->PredictSingleRow(num_iteration, predict_type, fastConfig->ncol, + get_row_fun, fastConfig->config, out_result, out_len); + API_END(); +} + int LGBM_BoosterPredictForCSC(BoosterHandle handle, const void* col_ptr, @@ -1983,6 +2058,42 @@ int LGBM_BoosterPredictForMatSingleRow(BoosterHandle handle, API_END(); } +int LGBM_BoosterPredictForMatSingleRowFastInit(BoosterHandle handle, + const int data_type, + const int32_t ncol, + const char* parameter, + FastConfigHandle *out_fastConfig) { + API_BEGIN(); + auto fastConfig_ptr = std::unique_ptr(new FastConfig( + reinterpret_cast(handle), + parameter, + data_type, + ncol)); + + if (fastConfig_ptr->config.num_threads > 0) { + omp_set_num_threads(fastConfig_ptr->config.num_threads); + } + + *out_fastConfig = fastConfig_ptr.release(); + API_END(); +} + +int LGBM_BoosterPredictForMatSingleRowFast(FastConfigHandle fastConfig_handle, + const void* data, + const int predict_type, + const int num_iteration, + int64_t* out_len, + double* out_result) { + API_BEGIN(); + FastConfig *fastConfig = reinterpret_cast(fastConfig_handle); + // Single row in row-major format: + auto get_row_fun = RowPairFunctionFromDenseMatric(data, 1, fastConfig->ncol, fastConfig->data_type, 1); + fastConfig->booster->PredictSingleRow(num_iteration, predict_type, fastConfig->ncol, + get_row_fun, fastConfig->config, + out_result, out_len); + API_END(); +} + int LGBM_BoosterPredictForMats(BoosterHandle handle, const void** data, diff --git a/swig/lightgbmlib.i b/swig/lightgbmlib.i index 985dfb481f2a..3a4b6f626e3d 100644 --- a/swig/lightgbmlib.i +++ b/swig/lightgbmlib.i @@ -95,6 +95,32 @@ return ret; } + /*! \brief Even faster variant of `LGBM_BoosterPredictForMatSingle`. + * + * Uses `LGBM_BoosterPredictForMatSingleRowFast` which is faster + * than `LGBM_BoosterPredictForMatSingleRow` and the trick of + * `LGBM_BoosterPredictForMatSingle` to capture the Java data array + * using `GetPrimitiveArrayCritical`, which can yield faster access + * to the array if the JVM passes the actual address to the C++ side + * instead of performing a copy. + */ + int LGBM_BoosterPredictForMatSingleRowFastCriticalSWIG(JNIEnv *jenv, + jdoubleArray data, + FastConfigHandle handle, + int predict_type, + int num_iteration, + int64_t* out_len, + double* out_result) { + double* data0 = (double*)jenv->GetPrimitiveArrayCritical(data, 0); + + int ret = LGBM_BoosterPredictForMatSingleRowFast(handle, data0, predict_type, + num_iteration, out_len, out_result); + + jenv->ReleasePrimitiveArrayCritical(data, data0, JNI_ABORT); + + return ret; + } + int LGBM_BoosterPredictForCSRSingle(JNIEnv *jenv, jintArray indices, jdoubleArray values, @@ -130,6 +156,50 @@ return ret; } + /*! \brief Even faster variant of `LGBM_BoosterPredictForCSRSingle`. + * + * Uses `LGBM_BoosterPredictForCSRSingleRowFast` which is faster + * than `LGBM_BoosterPredictForMatSingleRow` and the trick of + * `LGBM_BoosterPredictForCSRSingle` to capture the Java data array + * using `GetPrimitiveArrayCritical`, which can yield faster access + * to the array if the JVM passes the actual address to the C++ side + * instead of performing a copy. + */ + int LGBM_BoosterPredictForCSRSingleRowFastCriticalSWIG(JNIEnv *jenv, + jintArray indices, + jdoubleArray values, + int numNonZeros, + FastConfigHandle handle, + int indptr_type, + //int data_type, + int64_t nelem, + //int64_t num_col, + int predict_type, + int num_iteration, + //const char* parameter, + int64_t* out_len, + double* out_result) { + // Alternatives + // - GetIntArrayElements: performs copy + // - GetDirectBufferAddress: fails on wrapped array + // Some words of warning for GetPrimitiveArrayCritical + // https://stackoverflow.com/questions/23258357/whats-the-trade-off-between-using-getprimitivearraycritical-and-getprimitivety + + jboolean isCopy; + int* indices0 = (int*)jenv->GetPrimitiveArrayCritical(indices, &isCopy); + double* values0 = (double*)jenv->GetPrimitiveArrayCritical(values, &isCopy); + + int32_t ind[2] = { 0, numNonZeros }; + + int ret = LGBM_BoosterPredictForCSRSingleRowFast(handle, ind, indptr_type, indices0, values0, 2, + nelem, predict_type, num_iteration, out_len, out_result); + + jenv->ReleasePrimitiveArrayCritical(values, values0, JNI_ABORT); + jenv->ReleasePrimitiveArrayCritical(indices, indices0, JNI_ABORT); + + return ret; + } + #include #include