Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable parameter validation for skl. #5477

Merged
merged 2 commits into from
Apr 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions python-package/xgboost/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1098,6 +1098,7 @@ def _init_from_array_interface(self, data, missing, nthread):
ctypes.c_int(self.max_bin), ctypes.byref(handle)))
self.handle = handle


class Booster(object):
# pylint: disable=too-many-public-methods
"""A Booster of XGBoost.
Expand Down Expand Up @@ -1129,10 +1130,12 @@ def __init__(self, params=None, cache=(), model_file=None):
self.handle = ctypes.c_void_p()
_check_call(_LIB.XGBoosterCreate(dmats, c_bst_ulong(len(cache)),
ctypes.byref(self.handle)))
params = params or {}
if isinstance(params, list):
params.append(('validate_parameters', True))
else:
params['validate_parameters'] = True

if isinstance(params, dict) and \
'validate_parameters' not in params.keys():
params['validate_parameters'] = 1
self.set_param(params or {})
if (params is not None) and ('booster' in params):
self.booster = params['booster']
Expand Down
21 changes: 13 additions & 8 deletions python-package/xgboost/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def __init__(self, max_depth=None, learning_rate=None, n_estimators=100,
missing=np.nan, num_parallel_tree=None,
monotone_constraints=None, interaction_constraints=None,
importance_type="gain", gpu_id=None,
validate_parameters=False, **kwargs):
validate_parameters=None, **kwargs):
if not SKLEARN_INSTALLED:
raise XGBoostError(
'sklearn needs to be installed in order to use this module')
Expand Down Expand Up @@ -242,9 +242,6 @@ def __init__(self, max_depth=None, learning_rate=None, n_estimators=100,
self.interaction_constraints = interaction_constraints
self.importance_type = importance_type
self.gpu_id = gpu_id
# Parameter validation is not working with Scikit-Learn interface, as
# it passes all paraemters into XGBoost core, whether they are used or
# not.
self.validate_parameters = validate_parameters

def get_booster(self):
Expand Down Expand Up @@ -340,9 +337,16 @@ def parse_parameter(value):
return params

def get_xgb_params(self):
"""Get xgboost type parameters."""
xgb_params = self.get_params()
return xgb_params
"""Get xgboost specific parameters."""
params = self.get_params()
# Parameters that should not go into native learner.
wrapper_specific = {
'importance_type', 'kwargs', 'missing', 'n_estimators'}
filtered = dict()
for k, v in params.items():
if k not in wrapper_specific:
filtered[k] = v
return filtered

def get_num_boosting_rounds(self):
"""Gets the number of xgboost boosting rounds."""
Expand Down Expand Up @@ -540,7 +544,8 @@ def fit(self, X, y, sample_weight=None, base_margin=None,
if evals_result:
for val in evals_result.items():
evals_result_key = list(val[1].keys())[0]
evals_result[val[0]][evals_result_key] = val[1][evals_result_key]
evals_result[val[0]][evals_result_key] = val[1][
evals_result_key]
self.evals_result_ = evals_result

if early_stopping_rounds is not None:
Expand Down
25 changes: 24 additions & 1 deletion tests/python/test_with_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import unittest
import json

from test_basic import captured_output

rng = np.random.RandomState(1994)

pytestmark = pytest.mark.skipif(**tm.no_sklearn())
Expand Down Expand Up @@ -265,7 +267,7 @@ def test_parameter_tuning():
xgb_model = xgb.XGBRegressor(learning_rate=0.1)
clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
'n_estimators': [50, 100, 200]},
cv=3, verbose=1, iid=True)
cv=3, verbose=1)
clf.fit(X, y)
assert clf.best_score_ < 0.7
assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4}
Expand Down Expand Up @@ -785,6 +787,27 @@ def test_constraint_parameters():
'train_param']['interaction_constraints'] == '[[0, 1], [2, 3, 4]]'


def test_parameter_validation():
reg = xgb.XGBRegressor(foo='bar', verbosity=1)
X = np.random.randn(10, 10)
y = np.random.randn(10)
with captured_output() as (out, err):
reg.fit(X, y)
output = out.getvalue().strip()

assert output.find('foo') != -1

reg = xgb.XGBRegressor(n_estimators=2, missing=3,
importance_type='gain', verbosity=1)
X = np.random.randn(10, 10)
y = np.random.randn(10)
with captured_output() as (out, err):
reg.fit(X, y)
output = out.getvalue().strip()

assert len(output) == 0


class TestBoostFromPrediction(unittest.TestCase):
def run_boost_from_prediction(self, tree_method):
from sklearn.datasets import load_breast_cancer
Expand Down