diff --git a/include/LightGBM/application.h b/include/LightGBM/application.h index 827230e46f0b..c49fb23af7b2 100644 --- a/include/LightGBM/application.h +++ b/include/LightGBM/application.h @@ -18,7 +18,7 @@ class Metric; * \brief The entrance of LightGBM. this application has two tasks: * Train and Predict. * Train task will train a new model -* Predict task will predicting the scores of test data then saving the score to local disk +* Predict task will predict the scores of test data and save the score to local disk */ class Application { public: diff --git a/include/LightGBM/bin.h b/include/LightGBM/bin.h index e88a8655bc9e..dad87cf51f55 100644 --- a/include/LightGBM/bin.h +++ b/include/LightGBM/bin.h @@ -119,10 +119,10 @@ class BinMapper { }; /*! -* \brief Interface for ordered bin data. efficient for construct histogram, especally for sparse bin +* \brief Interface for ordered bin data. It very efficient for construct histogram, especially for sparse bin * There are 2 advantages for using ordered bin. * 1. group the data by leaf, improve the cache hit. -* 2. only store the non-zero bin, which can speed up the histogram cconsturction for sparse feature. +* 2. only store the non-zero bin, which can speed up the histogram consturction for sparse feature. * But it has a additional cost, it need re-order the bins after leaf split, which will cost much for dense feature. * So we only use ordered bin for sparse features now. */ diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp index 05a2a6ed5b3f..71ee3bf56594 100644 --- a/src/boosting/gbdt.cpp +++ b/src/boosting/gbdt.cpp @@ -152,7 +152,7 @@ void GBDT::Bagging(int iter) { } void GBDT::UpdateScoreOutOfBag(const Tree* tree) { - // we need to predict out-of-bag data's socres for boosing + // we need to predict out-of-bag socres of data for boosting if (out_of_bag_data_indices_ != nullptr) { train_score_updater_-> AddScore(tree, out_of_bag_data_indices_, out_of_bag_data_cnt_); @@ -169,12 +169,12 @@ void GBDT::Train() { Bagging(iter); // train a new tree Tree * new_tree = TrainOneTree(); - // if cannon learn a new tree, stop + // if cannot learn a new tree, then stop if (new_tree->num_leaves() <= 1) { Log::Stdout("Cannot do any boosting for tree cannot split"); break; } - // Shrinkage by learning rate + // shrinkage by learning rate new_tree->Shrinkage(gbdt_config_->learning_rate); // update score UpdateScore(new_tree); @@ -183,12 +183,12 @@ void GBDT::Train() { OutputMetric(iter + 1); // add model models_.push_back(new_tree); - // write model to file on every iteration + // save model to file per iteration fprintf(output_model_file, "Tree=%d\n", iter); fprintf(output_model_file, "%s\n", new_tree->ToString().c_str()); fflush(output_model_file); auto end_time = std::chrono::high_resolution_clock::now(); - // output used time on each iteration + // output used time per iteration Log::Stdout("%f seconds elapsed, finished %d iteration", std::chrono::duration(end_time - start_time) * 1e-3, iter + 1); } @@ -223,7 +223,7 @@ void GBDT::OutputMetric(int iter) { } void GBDT::Boosting() { - // objective function will calculation gradients and hessians + // objective function will calculate gradients and hessians object_function_-> GetGradients(train_score_updater_->score(), gradients_, hessians_); } @@ -248,6 +248,7 @@ std::string GBDT::ModelsToString() const { void GBDT::ModelsFromString(const std::string& model_str, int num_used_model) { // use serialized string to restore this object + // deseialize string to object???? models_.clear(); std::vector lines = Common::Split(model_str.c_str(), '\n'); size_t i = 0;