From 5c506acb39bcb75cf7098085fc3d9d643abd7f8d Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 29 Feb 2024 15:57:13 +0800
Subject: [PATCH 1/2] Disable column sample by node for the exact tree method.

The exact tree method grow by layers of nodes.
---
 doc/parameter.rst             |  2 +-
 src/tree/updater_colmaker.cc  | 18 +++++++++---------
 tests/python/test_updaters.py | 18 ++++++++++++++++--
 3 files changed, 26 insertions(+), 12 deletions(-)
diff --git a/doc/parameter.rst b/doc/parameter.rst
index 7898bb363549..e5cb13abfe7a 100644
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@@ -118,7 +118,7 @@ Parameters for Tree Booster
   - All ``colsample_by*`` parameters have a range of (0, 1], the default value of 1, and specify the fraction of columns to be subsampled.
   - ``colsample_bytree`` is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed.
   - ``colsample_bylevel`` is the subsample ratio of columns for each level. Subsampling occurs once for every new depth level reached in a tree. Columns are subsampled from the set of columns chosen for the current tree.
-  - ``colsample_bynode`` is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level.
+  - ``colsample_bynode`` is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level. This is not supported by the exact tree method.
   - ``colsample_by*`` parameters work cumulatively. For instance,
     the combination ``{'colsample_bytree':0.5, 'colsample_bylevel':0.5,
     'colsample_bynode':0.5}`` with 64 features will leave 8 features to choose from at
diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc
index ef166fae5132..45018da17adc 100644
--- a/src/tree/updater_colmaker.cc
+++ b/src/tree/updater_colmaker.cc
@@ -106,6 +106,9 @@ class ColMaker: public TreeUpdater {
     if (dmat->Info().HasCategorical()) {
       LOG(FATAL) << error::NoCategorical("Updater `grow_colmaker` or `exact` tree method");
     }
+    if (param->colsample_bynode - 1.0 != 0.0) {
+      LOG(FATAL) << "column sample by node is not yet supported by the exact tree method";
+    }
     this->LazyGetColumnDensity(dmat);
     // rescale learning rate according to size of trees
     interaction_constraints_.Configure(*param, dmat->Info().num_row_);
@@ -440,9 +443,8 @@ class ColMaker: public TreeUpdater {
     }
 
     // update the solution candidate
-    virtual void UpdateSolution(const SortedCSCPage &batch,
-                                const std::vector<bst_feature_t> &feat_set,
-                                const std::vector<GradientPair> &gpair, DMatrix *) {
+    void UpdateSolution(SortedCSCPage const &batch, const std::vector<bst_feature_t> &feat_set,
+                        const std::vector<GradientPair> &gpair) {
       // start enumeration
       const auto num_features = feat_set.size();
       CHECK(this->ctx_);
@@ -466,17 +468,15 @@ class ColMaker: public TreeUpdater {
             }
           });
     }
+
     // find splits at current level, do split per level
-    inline void FindSplit(int depth,
-                          const std::vector<int> &qexpand,
-                          const std::vector<GradientPair> &gpair,
-                          DMatrix *p_fmat,
-                          RegTree *p_tree) {
+    void FindSplit(bst_node_t depth, const std::vector<int> &qexpand,
+                   std::vector<GradientPair> const &gpair, DMatrix *p_fmat, RegTree *p_tree) {
       auto evaluator = tree_evaluator_.GetEvaluator();
 
       auto feat_set = column_sampler_->GetFeatureSet(depth);
       for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>(ctx_)) {
-        this->UpdateSolution(batch, feat_set->HostVector(), gpair, p_fmat);
+        this->UpdateSolution(batch, feat_set->HostVector(), gpair);
       }
       // after this each thread's stemp will get the best candidates, aggregate results
       this->SyncBestSolution(qexpand);
diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py
index e7641348d98e..8ec1fdd9d395 100644
--- a/tests/python/test_updaters.py
+++ b/tests/python/test_updaters.py
@@ -35,10 +35,24 @@ class TestTreeMethod:
     def test_exact(self, param, num_rounds, dataset):
         if dataset.name.endswith("-l1"):
             return
-        param['tree_method'] = 'exact'
+        param["tree_method"] = "exact"
         param = dataset.set_params(param)
         result = train_result(param, dataset.get_dmat(), num_rounds)
-        assert tm.non_increasing(result['train'][dataset.metric])
+        assert tm.non_increasing(result["train"][dataset.metric])
+
+    def test_exact_sample_by_node_error(self) -> None:
+        X, y, w = tm.make_regression(128, 12, False)
+        with pytest.raises(ValueError, match="column sample by node"):
+            xgb.train(
+                {"tree_method": "exact", "colsample_bynode": 0.999},
+                xgb.DMatrix(X, y, weight=w),
+            )
+
+        xgb.train(
+            {"tree_method": "exact", "colsample_bynode": 1.0},
+            xgb.DMatrix(X, y, weight=w),
+            num_boost_round=2,
+        )
 
     @given(
         exact_parameter_strategy,

From 77ac76fef73aed4f6f7e349c8c8d77dea771bae4 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 29 Feb 2024 16:21:18 +0800
Subject: [PATCH 2/2] disable feature weight test.

---
 R-package/tests/testthat/test_feature_weights.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-package/tests/testthat/test_feature_weights.R b/R-package/tests/testthat/test_feature_weights.R
index 4ed78c9b6cfe..54fec67cfcf5 100644
--- a/R-package/tests/testthat/test_feature_weights.R
+++ b/R-package/tests/testthat/test_feature_weights.R
@@ -25,7 +25,7 @@ test_that("training with feature weights works", {
     expect_lt(importance[1, Frequency], importance[9, Frequency])
   }
 
-  for (tm in c("hist", "approx", "exact")) {
+  for (tm in c("hist", "approx")) {
     test(tm)
   }
 })