dmlc · trivialfis · Mar 21, 2022 · Aug 24, 2021 · Mar 20, 2022 · Mar 20, 2022
diff --git a/amalgamation/xgboost-all0.cc b/amalgamation/xgboost-all0.cc
@@ -48,17 +48,18 @@
 #include "../src/predictor/cpu_predictor.cc"
 
 // trees
+#include "../src/tree/constraints.cc"
+#include "../src/tree/hist/param.cc"
 #include "../src/tree/param.cc"
 #include "../src/tree/tree_model.cc"
 #include "../src/tree/tree_updater.cc"
+#include "../src/tree/updater_approx.cc"
 #include "../src/tree/updater_colmaker.cc"
-#include "../src/tree/updater_quantile_hist.cc"
+#include "../src/tree/updater_histmaker.cc"
 #include "../src/tree/updater_prune.cc"
+#include "../src/tree/updater_quantile_hist.cc"
 #include "../src/tree/updater_refresh.cc"
 #include "../src/tree/updater_sync.cc"
-#include "../src/tree/updater_histmaker.cc"
-#include "../src/tree/updater_approx.cc"
-#include "../src/tree/constraints.cc"
 
 // linear
 #include "../src/linear/linear_updater.cc"

diff --git a/demo/guide-python/external_memory.py b/demo/guide-python/external_memory.py
@@ -7,6 +7,9 @@
 
     .. versionadded:: 1.5.0
 
+
+See :doc:`the tutorial </tutorials/external_memory>` for more details.
+
 """
 import os
 import xgboost
@@ -77,9 +80,14 @@ def main(tmpdir: str) -> xgboost.Booster:
     missing = np.NaN
     Xy = xgboost.DMatrix(it, missing=missing, enable_categorical=False)
 
-    # Other tree methods including ``hist`` and ``gpu_hist`` also work, but has some
-    # caveats.  This is still an experimental feature.
-    booster = xgboost.train({"tree_method": "approx"}, Xy, evals=[(Xy, "Train")])
+    # Other tree methods including ``hist`` and ``gpu_hist`` also work, see tutorial in
+    # doc for details.
+    booster = xgboost.train(
+        {"tree_method": "approx", "max_depth": 2},
+        Xy,
+        evals=[(Xy, "Train")],
+        num_boost_round=10,
+    )
     return booster
 
 

diff --git a/demo/guide-python/feature_weights.py b/demo/guide-python/feature_weights.py
@@ -27,7 +27,7 @@ def main(args):
     dtrain.set_info(feature_weights=fw)
 
     bst = xgboost.train({'tree_method': 'hist',
-                         'colsample_bynode': 0.5},
+                         'colsample_bynode': 0.2},
                         dtrain, num_boost_round=10,
                         evals=[(dtrain, 'd')])
     feature_map = bst.get_fscore()

diff --git a/doc/tutorials/external_memory.rst b/doc/tutorials/external_memory.rst
@@ -127,9 +127,12 @@ the tree method still concatenate all the chunks into 1 final histogram index du
 performance reason, but in compressed format.  So its scalability has an upper bound but
 still has lower memory cost in general.
 
-********
-CPU Hist
-********
-
-It's limited by the same factor of GPU Hist, except that gradient based sampling is not
-yet supported on CPU.
+***********
+CPU Version
+***********
+
+For CPU histogram based tree methods (``approx``, ``hist``) it's recommended to use
+``grow_policy=depthwise`` for performance reason.  Iterating over data batches is slow,
+with ``depthwise`` policy XGBoost can build a entire layer of tree nodes with a few
+iterations, while with ``lossguide`` XGBoost needs to iterate over the data set for each
+tree node.