From 3303158ced5d6999936fcac77405af03f93341fe Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 3 Apr 2020 03:38:57 +0800 Subject: [PATCH 1/2] Enable parameter validation for skl. --- python-package/xgboost/core.py | 9 ++++++--- python-package/xgboost/sklearn.py | 21 +++++++++++++-------- tests/python/test_with_sklearn.py | 25 ++++++++++++++++++++++++- 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 893ef361259b..688904ce1f34 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -1098,6 +1098,7 @@ def _init_from_array_interface(self, data, missing, nthread): ctypes.c_int(self.max_bin), ctypes.byref(handle))) self.handle = handle + class Booster(object): # pylint: disable=too-many-public-methods """A Booster of XGBoost. @@ -1129,10 +1130,12 @@ def __init__(self, params=None, cache=(), model_file=None): self.handle = ctypes.c_void_p() _check_call(_LIB.XGBoosterCreate(dmats, c_bst_ulong(len(cache)), ctypes.byref(self.handle))) + params = params or {} + if isinstance(params, list): + params.append(('validate_parameters', True)) + else: + params['validate_parameters'] = True - if isinstance(params, dict) and \ - 'validate_parameters' not in params.keys(): - params['validate_parameters'] = 1 self.set_param(params or {}) if (params is not None) and ('booster' in params): self.booster = params['booster'] diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 3e20904c005e..8787f9a7aa22 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -210,7 +210,7 @@ def __init__(self, max_depth=None, learning_rate=None, n_estimators=100, missing=np.nan, num_parallel_tree=None, monotone_constraints=None, interaction_constraints=None, importance_type="gain", gpu_id=None, - validate_parameters=False, **kwargs): + validate_parameters=None, **kwargs): if not SKLEARN_INSTALLED: raise XGBoostError( 'sklearn needs to be installed in order to use this module') @@ -242,9 +242,6 @@ def __init__(self, max_depth=None, learning_rate=None, n_estimators=100, self.interaction_constraints = interaction_constraints self.importance_type = importance_type self.gpu_id = gpu_id - # Parameter validation is not working with Scikit-Learn interface, as - # it passes all paraemters into XGBoost core, whether they are used or - # not. self.validate_parameters = validate_parameters def get_booster(self): @@ -340,9 +337,16 @@ def parse_parameter(value): return params def get_xgb_params(self): - """Get xgboost type parameters.""" - xgb_params = self.get_params() - return xgb_params + """Get xgboost specific parameters.""" + params = self.get_params() + # Parameters that should not go into native learner. + wrapper_specific = { + 'importance_type', 'kwargs', 'missing', 'n_estimators'} + filtered = dict() + for k, v in params.items(): + if k not in wrapper_specific: + filtered[k] = v + return filtered def get_num_boosting_rounds(self): """Gets the number of xgboost boosting rounds.""" @@ -540,7 +544,8 @@ def fit(self, X, y, sample_weight=None, base_margin=None, if evals_result: for val in evals_result.items(): evals_result_key = list(val[1].keys())[0] - evals_result[val[0]][evals_result_key] = val[1][evals_result_key] + evals_result[val[0]][evals_result_key] = val[1][ + evals_result_key] self.evals_result_ = evals_result if early_stopping_rounds is not None: diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 0a58f3af5241..f59bb0e6057a 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -9,6 +9,8 @@ import unittest import json +from test_basic import captured_output + rng = np.random.RandomState(1994) pytestmark = pytest.mark.skipif(**tm.no_sklearn()) @@ -265,7 +267,7 @@ def test_parameter_tuning(): xgb_model = xgb.XGBRegressor(learning_rate=0.1) clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6], 'n_estimators': [50, 100, 200]}, - cv=3, verbose=1, iid=True) + cv=3, verbose=1) clf.fit(X, y) assert clf.best_score_ < 0.7 assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4} @@ -785,6 +787,27 @@ def test_constraint_parameters(): 'train_param']['interaction_constraints'] == '[[0, 1], [2, 3, 4]]' +def test_parameter_validation(): + reg = xgb.XGBRegressor(foo='bar') + X = np.random.randn(10, 10) + y = np.random.randn(10) + with captured_output() as (out, err): + reg.fit(X, y) + output = out.getvalue().strip() + + assert output.find('foo') != -1 + + reg = xgb.XGBRegressor(n_estimators=2, missing=3, + importance_type='gain') + X = np.random.randn(10, 10) + y = np.random.randn(10) + with captured_output() as (out, err): + reg.fit(X, y) + output = out.getvalue().strip() + + assert len(output) == 0 + + class TestBoostFromPrediction(unittest.TestCase): def run_boost_from_prediction(self, tree_method): from sklearn.datasets import load_breast_cancer From 326ab9b0b9c524ef315650baca5fbee712b5bf01 Mon Sep 17 00:00:00 2001 From: fis Date: Fri, 3 Apr 2020 04:13:34 +0800 Subject: [PATCH 2/2] Set verbosity in test. --- tests/python/test_with_sklearn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index f59bb0e6057a..4f1e8629c707 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -788,7 +788,7 @@ def test_constraint_parameters(): def test_parameter_validation(): - reg = xgb.XGBRegressor(foo='bar') + reg = xgb.XGBRegressor(foo='bar', verbosity=1) X = np.random.randn(10, 10) y = np.random.randn(10) with captured_output() as (out, err): @@ -798,7 +798,7 @@ def test_parameter_validation(): assert output.find('foo') != -1 reg = xgb.XGBRegressor(n_estimators=2, missing=3, - importance_type='gain') + importance_type='gain', verbosity=1) X = np.random.randn(10, 10) y = np.random.randn(10) with captured_output() as (out, err):