Skip to content

Commit

Permalink
Fix tests (#3)
Browse files Browse the repository at this point in the history
* update atof

* fix bug

* fix tests.

* fix bug

* fix dtypes

* fix categorical feature override

* fix protobuf on vs build (microsoft#1004)

* [optional] support protobuf

* fix windows/LightGBM.vcxproj

* add doc

* fix doc

* fix vs support (#2)

* fix vs support

* fix cmake

* fix microsoft#1012

* [python] add network config api  (microsoft#1019)

* add network

* update doc

* add float tolerance in bin finder.

* fix a bug

* update tests

* add double torelance on tree model

* fix tests

* simplify the double comparison

* fix lightsvm zero base

* move double tolerance to the bin finder.

* fix pylint
  • Loading branch information
guolinke authored and wxchan committed Oct 26, 2017
1 parent 4be82cd commit 8014042
Show file tree
Hide file tree
Showing 24 changed files with 502 additions and 93 deletions.
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ env:
- TASK=if-else
- TASK=sdist PYTHON_VERSION=3.4
- TASK=bdist PYTHON_VERSION=3.5
- TASK=proto
- TASK=gpu METHOD=source
- TASK=gpu METHOD=pip

Expand All @@ -38,6 +39,8 @@ matrix:
env: TASK=pylint
- os: osx
env: TASK=check-docs
- os: osx
env: TASK=proto

before_install:
- test -n $CC && unset CC
Expand Down
14 changes: 13 additions & 1 deletion .travis/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,24 @@ if [[ ${TASK} == "if-else" ]]; then
conda create -q -n test-env python=$PYTHON_VERSION numpy
source activate test-env
mkdir build && cd build && cmake .. && make lightgbm || exit -1
cd $TRAVIS_BUILD_DIR/tests/cpp_test && ../../lightgbm config=train.conf && ../../lightgbm config=predict.conf output_result=origin.pred || exit -1
cd $TRAVIS_BUILD_DIR/tests/cpp_test && ../../lightgbm config=train.conf convert_model_language=cpp convert_model=../../src/boosting/gbdt_prediction.cpp && ../../lightgbm config=predict.conf output_result=origin.pred || exit -1
cd $TRAVIS_BUILD_DIR/build && make lightgbm || exit -1
cd $TRAVIS_BUILD_DIR/tests/cpp_test && ../../lightgbm config=predict.conf output_result=ifelse.pred && python test.py || exit -1
exit 0
fi

if [[ ${TASK} == "proto" ]]; then
conda create -q -n test-env python=$PYTHON_VERSION numpy
source activate test-env
mkdir build && cd build && cmake .. && make lightgbm || exit -1
cd $TRAVIS_BUILD_DIR/tests/cpp_test && ../../lightgbm config=train.conf && ../../lightgbm config=predict.conf output_result=origin.pred || exit -1
cd $TRAVIS_BUILD_DIR && git clone https://github.com/google/protobuf && cd protobuf && ./autogen.sh && ./configure && make && sudo make install && sudo ldconfig
cd $TRAVIS_BUILD_DIR/build && rm -rf * && cmake -DUSE_PROTO=ON .. && make lightgbm || exit -1
cd $TRAVIS_BUILD_DIR/tests/cpp_test && ../../lightgbm config=train.conf model_format=proto && ../../lightgbm config=predict.conf output_result=proto.pred model_format=proto || exit -1
cd $TRAVIS_BUILD_DIR/tests/cpp_test && python test.py || exit -1
exit 0
fi

conda create -q -n test-env python=$PYTHON_VERSION numpy nose scipy scikit-learn pandas matplotlib pytest
source activate test-env

Expand Down
21 changes: 19 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,25 @@ file(GLOB SOURCES
src/treelearner/*.cpp
)

add_executable(lightgbm src/main.cpp ${SOURCES})
add_library(_lightgbm SHARED src/c_api.cpp src/lightgbm_R.cpp ${SOURCES})
if (USE_PROTO)
if(MSVC)
message(FATAL_ERROR "Cannot use proto with MSVC.")
endif(MSVC)
find_package(Protobuf REQUIRED)
PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS proto/model.proto)
include_directories(${PROTOBUF_INCLUDE_DIRS})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
ADD_DEFINITIONS(-DUSE_PROTO)
SET(PROTO_FILES src/proto/gbdt_model_proto.cpp ${PROTO_HDRS} ${PROTO_SRCS})
endif(USE_PROTO)

add_executable(lightgbm src/main.cpp ${SOURCES} ${PROTO_FILES})
add_library(_lightgbm SHARED src/c_api.cpp src/lightgbm_R.cpp ${SOURCES} ${PROTO_FILES})

if (USE_PROTO)
TARGET_LINK_LIBRARIES(lightgbm ${PROTOBUF_LIBRARIES})
TARGET_LINK_LIBRARIES(_lightgbm ${PROTOBUF_LIBRARIES})
endif(USE_PROTO)

if(MSVC)
set_target_properties(_lightgbm PROPERTIES OUTPUT_NAME "lib_lightgbm")
Expand Down
15 changes: 15 additions & 0 deletions docs/Installation-Guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,21 @@ Following procedure is for the MSVC (Microsoft Visual C++) build.
**Note**: ``C:\local\boost_1_64_0\`` and ``C:\local\boost_1_64_0\lib64-msvc-14.0`` are locations of your Boost binaries. You also can set them to the environment variable to avoid ``Set ...`` commands when build.

Protobuf Support
^^^^^^^^^^^^^^^^

If you want to use protobuf to save and load models, install `protobuf c++ version <https://github.com/google/protobuf/blob/master/src/README.md>`__ first.

Then run cmake with USE_PROTO on, for example:

.. code::
cmake -DUSE_PROTO=ON ..
You can then use ``model_format=proto`` in parameters when save and load models.

**Note**: for windows user, it's only tested with mingw.

Docker
^^^^^^

Expand Down
14 changes: 14 additions & 0 deletions docs/Parameters.rst
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,20 @@ IO Parameters

- file name of prediction result in ``prediction`` task

- ``model_format``, default=\ ``text``, type=string

- format to save and load model.

- ``text``, use text string.

- ``proto``, use protocol buffer binary format.

- save multiple formats by joining them with comma, like ``text,proto``, in this case, ``model_format`` will be add as suffix after ``output_model``.

- not support loading with multiple formats.

- Note: you need to cmake with -DUSE_PROTO=ON to use this parameter.

- ``is_pre_partition``, default=\ ``false``, type=bool

- used for parallel learning (not include feature parallel)
Expand Down
36 changes: 25 additions & 11 deletions include/LightGBM/boosting.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
#include <LightGBM/meta.h>
#include <LightGBM/config.h>

#ifdef USE_PROTO
#include "model.pb.h"
#endif // USE_PROTO

#include <vector>
#include <string>

Expand Down Expand Up @@ -166,7 +170,7 @@ class LIGHTGBM_EXPORT Boosting {

/*!
* \brief Save model to file
* \param num_used_model Number of model that want to save, -1 means save all
* \param num_iterations Number of model that want to save, -1 means save all
* \param is_finish Is training finished or not
* \param filename Filename that want to save to
* \return true if succeeded
Expand All @@ -175,7 +179,7 @@ class LIGHTGBM_EXPORT Boosting {

/*!
* \brief Save model to string
* \param num_used_model Number of model that want to save, -1 means save all
* \param num_iterations Number of model that want to save, -1 means save all
* \return Non-empty string if succeeded
*/
virtual std::string SaveModelToString(int num_iterations) const = 0;
Expand All @@ -187,6 +191,22 @@ class LIGHTGBM_EXPORT Boosting {
*/
virtual bool LoadModelFromString(const std::string& model_str) = 0;

#ifdef USE_PROTO
/*!
* \brief Save model with protobuf
* \param num_iterations Number of model that want to save, -1 means save all
* \param filename Filename that want to save to
*/
virtual void SaveModelToProto(int num_iteration, const char* filename) const = 0;

/*!
* \brief Restore from a serialized protobuf file
* \param filename Filename that want to restore from
* \return true if succeeded
*/
virtual bool LoadModelFromProto(const char* filename) = 0;
#endif // USE_PROTO

/*!
* \brief Calculate feature importances
* \param num_iteration Number of model that want to use for feature importance, -1 means use all
Expand Down Expand Up @@ -251,23 +271,17 @@ class LIGHTGBM_EXPORT Boosting {
/*! \brief Disable copy */
Boosting(const Boosting&) = delete;

static bool LoadFileToBoosting(Boosting* boosting, const char* filename);
static bool LoadFileToBoosting(Boosting* boosting, const std::string& format, const char* filename);

/*!
* \brief Create boosting object
* \param type Type of boosting
* \param format Format of model
* \param config config for boosting
* \param filename name of model file, if existing will continue to train from this model
* \return The boosting object
*/
static Boosting* CreateBoosting(const std::string& type, const char* filename);

/*!
* \brief Create boosting object from model file
* \param filename name of model file
* \return The boosting object
*/
static Boosting* CreateBoosting(const char* filename);
static Boosting* CreateBoosting(const std::string& type, const std::string& format, const char* filename);

};

Expand Down
3 changes: 2 additions & 1 deletion include/LightGBM/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ struct IOConfig: public ConfigBase {
std::string output_result = "LightGBM_predict_result.txt";
std::string convert_model = "gbdt_prediction.cpp";
std::string input_model = "";
std::string model_format = "text";
int verbosity = 1;
int num_iteration_predict = -1;
bool is_pre_partition = false;
Expand Down Expand Up @@ -445,7 +446,7 @@ struct ParameterAlias {
const std::unordered_set<std::string> parameter_set({
"config", "config_file", "task", "device",
"num_threads", "seed", "boosting_type", "objective", "data",
"output_model", "input_model", "output_result", "valid_data",
"output_model", "input_model", "output_result", "model_format", "valid_data",
"is_enable_sparse", "is_pre_partition", "is_training_metric",
"ndcg_eval_at", "min_data_in_leaf", "min_sum_hessian_in_leaf",
"num_leaves", "feature_fraction", "num_iterations",
Expand Down
15 changes: 15 additions & 0 deletions include/LightGBM/tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

#include <LightGBM/meta.h>
#include <LightGBM/dataset.h>
#ifdef USE_PROTO
#include "model.pb.h"
#endif // USE_PROTO

#include <string>
#include <vector>
Expand Down Expand Up @@ -30,6 +33,13 @@ class Tree {
* \param str Model string
*/
explicit Tree(const std::string& str);
#ifdef USE_PROTO
/*!
* \brief Construtor, from a protobuf object
* \param model_tree Model protobuf object
*/
explicit Tree(const Model_Tree& model_tree);
#endif // USE_PROTO

~Tree();

Expand Down Expand Up @@ -165,6 +175,11 @@ class Tree {
/*! \brief Serialize this object to if-else statement*/
std::string ToIfElse(int index, bool is_predict_leaf_index) const;

#ifdef USE_PROTO
/*! \brief Serialize this object to protobuf object*/
void ToProto(Model_Tree& model_tree) const;
#endif // USE_PROTO

inline static bool IsZero(double fval) {
if (fval > -kZeroAsMissingValueRange && fval <= kZeroAsMissingValueRange) {
return true;
Expand Down
26 changes: 16 additions & 10 deletions include/LightGBM/utils/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,11 @@ inline static const char* Atoi(const char* p, int* out) {
}

template<class T>
inline static T Pow(T base, int power) {
if (power == 0) {
inline static double Pow(T base, int power) {
if (power < 0) {
return 1.0 / Pow(base, -power);
} else if (power == 0) {
return 1;
} else if (power == 1) {
return base;
} else if (power % 2 == 0) {
return Pow(base*base, power / 2);
} else if (power % 3 == 0) {
Expand All @@ -183,7 +183,6 @@ inline static const char* Atof(const char* p, double* out) {
while (*p == ' ') {
++p;
}

// Get sign, if any.
sign = 1.0;
if (*p == '-') {
Expand Down Expand Up @@ -241,9 +240,9 @@ inline static const char* Atof(const char* p, double* out) {
} else {
size_t cnt = 0;
while (*(p + cnt) != '\0' && *(p + cnt) != ' '
&& *(p + cnt) != '\t' && *(p + cnt) != ','
&& *(p + cnt) != '\n' && *(p + cnt) != '\r'
&& *(p + cnt) != ':') {
&& *(p + cnt) != '\t' && *(p + cnt) != ','
&& *(p + cnt) != '\n' && *(p + cnt) != '\r'
&& *(p + cnt) != ':') {
++cnt;
}
if (cnt > 0) {
Expand All @@ -267,8 +266,6 @@ inline static const char* Atof(const char* p, double* out) {
return p;
}



inline bool AtoiAndCheck(const char* p, int* out) {
const char* after = Atoi(p, out);
if (*after != '\0') {
Expand Down Expand Up @@ -649,6 +646,15 @@ inline bool FindInBitset(const uint32_t* bits, int n, T pos) {
return (bits[i1] >> i2) & 1;
}

inline static bool CheckDoubleEqualOrdered(double a, double b) {
double upper = std::nextafter(a, INFINITY);
return b <= upper;
}

inline static double GetDoubleUpperBound(double a) {
return std::nextafter(a, INFINITY);;
}

} // namespace Common

} // namespace LightGBM
Expand Down
33 changes: 33 additions & 0 deletions proto/model.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
syntax = "proto3";

package LightGBM;

message Model {
string name = 1;
uint32 num_class = 2;
uint32 num_tree_per_iteration = 3;
uint32 label_index = 4;
uint32 max_feature_idx = 5;
string objective = 6;
bool average_output = 7;
repeated string feature_names = 8;
repeated string feature_infos = 9;
message Tree {
uint32 num_leaves = 1;
uint32 num_cat = 2;
repeated uint32 split_feature = 3;
repeated double split_gain = 4;
repeated double threshold = 5;
repeated uint32 decision_type = 6;
repeated sint32 left_child = 7;
repeated sint32 right_child = 8;
repeated double leaf_value = 9;
repeated uint32 leaf_count = 10;
repeated double internal_value = 11;
repeated double internal_count = 12;
repeated sint32 cat_boundaries = 13;
repeated uint32 cat_threshold = 14;
double shrinkage = 15;
}
repeated Tree trees = 10;
}
Loading

0 comments on commit 8014042

Please sign in to comment.