diff --git a/python-package/xgboost/training.py b/python-package/xgboost/training.py
index 34ad027d1e91..345603732341 100644
--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@@ -142,9 +142,20 @@ def _train_internal(params, dtrain,
         )
     else:
         raise ValueError(f'Unknown booster: {booster}')
-    num_groups = int(config['learner']['learner_model_param']['num_class'])
-    num_groups = 1 if num_groups == 0 else num_groups
-    bst.best_ntree_limit = (bst.best_iteration + 1) * num_parallel_tree * num_groups
+
+    if bst.attr('best_score') is not None:
+        bst.best_score = float(bst.attr('best_score'))
+        bst.best_iteration = int(bst.attr('best_iteration'))
+        # num_class is handled internally
+        bst.set_attr(
+            best_ntree_limit=str((bst.best_iteration + 1) * num_parallel_tree)
+        )
+        bst.best_ntree_limit = int(bst.attr("best_ntree_limit"))
+    else:
+        # Due to compatibility with version older than 1.4, these attributes are added
+        # to Python object even if early stopping is not used.
+        bst.best_iteration = bst.num_boosted_rounds() - 1
+        bst.best_ntree_limit = (bst.best_iteration + 1) * num_parallel_tree
 
     # Copy to serialise and unserialise booster to reset state and free
     # training memory
@@ -184,9 +195,10 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
         If there's more than one metric in the **eval_metric** parameter given in
         **params**, the last metric will be used for early stopping.
         If early stopping occurs, the model will have three additional fields:
-        ``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``.
-        (Use ``bst.best_ntree_limit`` to get the correct value if
-        ``num_parallel_tree`` and/or ``num_class`` appears in the parameters)
+        ``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``.  Use
+        ``bst.best_ntree_limit`` to get the correct value if ``num_parallel_tree`` and/or
+        ``num_class`` appears in the parameters.  ``best_ntree_limit`` is the result of
+        ``num_parallel_tree * best_iteration``.
     evals_result: dict
         This dictionary stores the evaluation results of all the items in watchlist.
 
diff --git a/tests/python/test_predict.py b/tests/python/test_predict.py
index a44eea916222..ef719bd47044 100644
--- a/tests/python/test_predict.py
+++ b/tests/python/test_predict.py
@@ -33,9 +33,15 @@ def run_predict_leaf(predictor):
     y = rng.randint(low=0, high=classes, size=rows)
     m = xgb.DMatrix(X, y)
     booster = xgb.train(
-        {'num_parallel_tree': num_parallel_tree, 'num_class': classes,
-         'predictor': predictor, 'tree_method': 'hist'}, m,
-        num_boost_round=num_boost_round)
+        {
+            "num_parallel_tree": num_parallel_tree,
+            "num_class": classes,
+            "predictor": predictor,
+            "tree_method": "hist",
+        },
+        m,
+        num_boost_round=num_boost_round,
+    )
 
     empty = xgb.DMatrix(np.ones(shape=(0, cols)))
     empty_leaf = booster.predict(empty, pred_leaf=True)
@@ -52,12 +58,19 @@ def run_predict_leaf(predictor):
             end = classes * num_parallel_tree * (j + 1)
             layer = row[start: end]
             for c in range(classes):
-                tree_group = layer[c * num_parallel_tree:
-                                   (c+1) * num_parallel_tree]
+                tree_group = layer[c * num_parallel_tree: (c + 1) * num_parallel_tree]
                 assert tree_group.shape[0] == num_parallel_tree
                 # no subsampling so tree in same forest should output same
                 # leaf.
                 assert np.all(tree_group == tree_group[0])
+
+    ntree_limit = 2
+    sliced = booster.predict(
+        m, pred_leaf=True, ntree_limit=num_parallel_tree * ntree_limit
+    )
+    first = sliced[0, ...]
+
+    assert first.shape[0] == classes * num_parallel_tree * ntree_limit
     return leaf
 
 
diff --git a/tests/python/test_training_continuation.py b/tests/python/test_training_continuation.py
index 2c4e577d2316..9990ca61b05a 100644
--- a/tests/python/test_training_continuation.py
+++ b/tests/python/test_training_continuation.py
@@ -123,13 +123,13 @@ def run_training_continuation(self, xgb_params_01, xgb_params_02,
         gbdt_05 = xgb.train(xgb_params_03, dtrain_5class,
                             num_boost_round=7)
         assert gbdt_05.best_ntree_limit == (
-            gbdt_05.best_iteration + 1) * self.num_parallel_tree * 5
+            gbdt_05.best_iteration + 1) * self.num_parallel_tree
         gbdt_05 = xgb.train(xgb_params_03,
                             dtrain_5class,
                             num_boost_round=3,
                             xgb_model=gbdt_05)
         assert gbdt_05.best_ntree_limit == (
-            gbdt_05.best_iteration + 1) * self.num_parallel_tree * 5
+            gbdt_05.best_iteration + 1) * self.num_parallel_tree
 
         res1 = gbdt_05.predict(dtrain_5class)
         res2 = gbdt_05.predict(dtrain_5class,
diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
index d4d121f12283..d2c90fb71b91 100644
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -92,7 +92,7 @@ def train(booster, forest):
         )
 
         if forest:
-            assert cls.best_ntree_limit == rounds * forest * cls.n_classes_
+            assert cls.best_ntree_limit == rounds * forest
         else:
             assert cls.best_ntree_limit == 0