From bc35b8e97b127b95574981a66ee91f28b53effa4 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Thu, 27 Sep 2018 15:03:05 -0700 Subject: [PATCH] Fix #3730: scikit-learn 0.20 compatibility fix (#3731) * Fix #3730: scikit-learn 0.20 compatibility fix sklearn.cross_validation has been removed from scikit-learn 0.20, so replace it with sklearn.model_selection * Display test names for Python tests for clarity --- tests/ci_build/test_gpu.sh | 2 +- tests/python-gpu/test_gpu_prediction.py | 2 +- tests/python/test_with_sklearn.py | 53 ++++++++++--------------- tests/travis/run_test.sh | 8 ++-- 4 files changed, 28 insertions(+), 37 deletions(-) diff --git a/tests/ci_build/test_gpu.sh b/tests/ci_build/test_gpu.sh index ee8c981097a1..7b11a64521a2 100755 --- a/tests/ci_build/test_gpu.sh +++ b/tests/ci_build/test_gpu.sh @@ -3,6 +3,6 @@ cd python-package python setup.py install --user cd .. -python -m nose --attr='!slow' tests/python-gpu/ +python -m nose -v --attr='!slow' tests/python-gpu/ ./testxgboost diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py index 07e86d8de80f..db50cdb46d5a 100644 --- a/tests/python-gpu/test_gpu_prediction.py +++ b/tests/python-gpu/test_gpu_prediction.py @@ -49,7 +49,7 @@ def non_decreasing(self, L): # Test case for a bug where multiple batch predictions made on a test set produce incorrect results def test_multi_predict(self): from sklearn.datasets import make_regression - from sklearn.cross_validation import train_test_split + from sklearn.model_selection import train_test_split n = 1000 X, y = make_regression(n, random_state=rng) diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 6fc2eaecb5bd..f0e624afb37e 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -9,21 +9,13 @@ def test_binary_classification(): tm._skip_if_no_sklearn() from sklearn.datasets import load_digits - try: - from sklearn.model_selection import KFold - except: - from sklearn.cross_validation import KFold + from sklearn.model_selection import KFold digits = load_digits(2) y = digits['target'] X = digits['data'] - try: - kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng) - except TypeError: # sklearn.model_selection.KFold uses n_split - kf = KFold( - n_splits=2, shuffle=True, random_state=rng - ).split(np.arange(y.shape[0])) - for train_index, test_index in kf: + kf = KFold(n_splits=2, shuffle=True, random_state=rng) + for train_index, test_index in kf.split(X, y): xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index]) preds = xgb_model.predict(X[test_index]) labels = y[test_index] @@ -35,10 +27,7 @@ def test_binary_classification(): def test_multiclass_classification(): tm._skip_if_no_sklearn() from sklearn.datasets import load_iris - try: - from sklearn.cross_validation import KFold - except: - from sklearn.model_selection import KFold + from sklearn.model_selection import KFold def check_pred(preds, labels): err = sum(1 for i in range(len(preds)) @@ -48,8 +37,8 @@ def check_pred(preds, labels): iris = load_iris() y = iris['target'] X = iris['data'] - kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng) - for train_index, test_index in kf: + kf = KFold(n_splits=2, shuffle=True, random_state=rng) + for train_index, test_index in kf.split(X, y): xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index]) preds = xgb_model.predict(X[test_index]) # test other params in XGBClassifier().fit @@ -98,13 +87,13 @@ def test_boston_housing_regression(): tm._skip_if_no_sklearn() from sklearn.metrics import mean_squared_error from sklearn.datasets import load_boston - from sklearn.cross_validation import KFold + from sklearn.model_selection import KFold boston = load_boston() y = boston['target'] X = boston['data'] - kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng) - for train_index, test_index in kf: + kf = KFold(n_splits=2, shuffle=True, random_state=rng) + for train_index, test_index in kf.split(X, y): xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index]) preds = xgb_model.predict(X[test_index]) @@ -122,7 +111,7 @@ def test_boston_housing_regression(): def test_parameter_tuning(): tm._skip_if_no_sklearn() - from sklearn.grid_search import GridSearchCV + from sklearn.model_selection import GridSearchCV from sklearn.datasets import load_boston boston = load_boston() @@ -130,7 +119,8 @@ def test_parameter_tuning(): X = boston['data'] xgb_model = xgb.XGBRegressor() clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6], - 'n_estimators': [50, 100, 200]}, verbose=1) + 'n_estimators': [50, 100, 200]}, + cv=3, verbose=1, iid=True) clf.fit(X, y) assert clf.best_score_ < 0.7 assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4} @@ -140,7 +130,7 @@ def test_regression_with_custom_objective(): tm._skip_if_no_sklearn() from sklearn.metrics import mean_squared_error from sklearn.datasets import load_boston - from sklearn.cross_validation import KFold + from sklearn.model_selection import KFold def objective_ls(y_true, y_pred): grad = (y_pred - y_true) @@ -150,8 +140,8 @@ def objective_ls(y_true, y_pred): boston = load_boston() y = boston['target'] X = boston['data'] - kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng) - for train_index, test_index in kf: + kf = KFold(n_splits=2, shuffle=True, random_state=rng) + for train_index, test_index in kf.split(X, y): xgb_model = xgb.XGBRegressor(objective=objective_ls).fit( X[train_index], y[train_index] ) @@ -173,7 +163,7 @@ def dummy_objective(y_true, y_pred): def test_classification_with_custom_objective(): tm._skip_if_no_sklearn() from sklearn.datasets import load_digits - from sklearn.cross_validation import KFold + from sklearn.model_selection import KFold def logregobj(y_true, y_pred): y_pred = 1.0 / (1.0 + np.exp(-y_pred)) @@ -184,8 +174,8 @@ def logregobj(y_true, y_pred): digits = load_digits(2) y = digits['target'] X = digits['data'] - kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng) - for train_index, test_index in kf: + kf = KFold(n_splits=2, shuffle=True, random_state=rng) + for train_index, test_index in kf.split(X, y): xgb_model = xgb.XGBClassifier(objective=logregobj) xgb_model.fit(X[train_index], y[train_index]) preds = xgb_model.predict(X[test_index]) @@ -212,10 +202,11 @@ def dummy_objective(y_true, y_preds): def test_sklearn_api(): tm._skip_if_no_sklearn() from sklearn.datasets import load_iris - from sklearn.cross_validation import train_test_split + from sklearn.model_selection import train_test_split iris = load_iris() - tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target, train_size=120) + tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target, + train_size=120, test_size=0.2) classifier = xgb.XGBClassifier(booster='gbtree', n_estimators=10) classifier.fit(tr_d, tr_l) @@ -229,7 +220,7 @@ def test_sklearn_api(): def test_sklearn_api_gblinear(): tm._skip_if_no_sklearn() from sklearn.datasets import load_iris - from sklearn.cross_validation import train_test_split + from sklearn.model_selection import train_test_split iris = load_iris() tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target, train_size=120) diff --git a/tests/travis/run_test.sh b/tests/travis/run_test.sh index 9844246d8e08..7877b016a555 100755 --- a/tests/travis/run_test.sh +++ b/tests/travis/run_test.sh @@ -48,7 +48,7 @@ if [ ${TASK} == "python_test" ]; then python --version conda install numpy scipy pandas matplotlib nose scikit-learn python -m pip install graphviz pytest pytest-cov codecov - python -m nose tests/python || exit -1 + python -m nose -v tests/python || exit -1 py.test tests/python --cov=python-package/xgboost codecov source activate python2 @@ -56,7 +56,7 @@ if [ ${TASK} == "python_test" ]; then python --version conda install numpy scipy pandas matplotlib nose scikit-learn python -m pip install graphviz - python -m nose tests/python || exit -1 + python -m nose -v tests/python || exit -1 exit 0 fi @@ -67,7 +67,7 @@ if [ ${TASK} == "python_lightweight_test" ]; then python --version conda install numpy scipy nose python -m pip install graphviz pytest pytest-cov codecov - python -m nose tests/python || exit -1 + python -m nose -v tests/python || exit -1 py.test tests/python --cov=python-package/xgboost codecov source activate python2 @@ -75,7 +75,7 @@ if [ ${TASK} == "python_lightweight_test" ]; then python --version conda install numpy scipy nose python -m pip install graphviz - python -m nose tests/python || exit -1 + python -m nose -v tests/python || exit -1 python -m pip install flake8==3.4.1 flake8 --ignore E501 python-package || exit -1 flake8 --ignore E501 tests/python || exit -1