Fix #3730: scikit-learn 0.20 compatibility fix (#3731)

* Fix #3730: scikit-learn 0.20 compatibility fix sklearn.cross_validation has been removed from scikit-learn 0.20, so replace it with sklearn.model_selection * Display test names for Python tests for clarity
dmlc · Sep 28, 2018 · bc35b8e · bc35b8e
1 parent b1233ef
commit bc35b8e
Show file tree

Hide file tree

Showing 4 changed files with 28 additions and 37 deletions.
diff --git a/tests/ci_build/test_gpu.sh b/tests/ci_build/test_gpu.sh
@@ -3,6 +3,6 @@
 cd python-package
 python setup.py install --user
 cd ..
-python -m nose --attr='!slow' tests/python-gpu/
+python -m nose -v --attr='!slow' tests/python-gpu/
 ./testxgboost
 
diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py
@@ -49,7 +49,7 @@ def non_decreasing(self, L):
     # Test case for a bug where multiple batch predictions made on a test set produce incorrect results
     def test_multi_predict(self):
         from sklearn.datasets import make_regression
-        from sklearn.cross_validation import train_test_split
+        from sklearn.model_selection import train_test_split
 
         n = 1000
         X, y = make_regression(n, random_state=rng)

diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py
@@ -9,21 +9,13 @@
 def test_binary_classification():
     tm._skip_if_no_sklearn()
     from sklearn.datasets import load_digits
-    try:
-        from sklearn.model_selection import KFold
-    except:
-        from sklearn.cross_validation import KFold
+    from sklearn.model_selection import KFold
 
     digits = load_digits(2)
     y = digits['target']
     X = digits['data']
-    try:
-        kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
-    except TypeError:  # sklearn.model_selection.KFold uses n_split
-        kf = KFold(
-            n_splits=2, shuffle=True, random_state=rng
-        ).split(np.arange(y.shape[0]))
-    for train_index, test_index in kf:
+    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf.split(X, y):
         xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
         preds = xgb_model.predict(X[test_index])
         labels = y[test_index]
@@ -35,10 +27,7 @@ def test_binary_classification():
 def test_multiclass_classification():
     tm._skip_if_no_sklearn()
     from sklearn.datasets import load_iris
-    try:
-        from sklearn.cross_validation import KFold
-    except:
-        from sklearn.model_selection import KFold
+    from sklearn.model_selection import KFold
 
     def check_pred(preds, labels):
         err = sum(1 for i in range(len(preds))
@@ -48,8 +37,8 @@ def check_pred(preds, labels):
     iris = load_iris()
     y = iris['target']
     X = iris['data']
-    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
-    for train_index, test_index in kf:
+    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf.split(X, y):
         xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
         preds = xgb_model.predict(X[test_index])
         # test other params in XGBClassifier().fit
@@ -98,13 +87,13 @@ def test_boston_housing_regression():
     tm._skip_if_no_sklearn()
     from sklearn.metrics import mean_squared_error
     from sklearn.datasets import load_boston
-    from sklearn.cross_validation import KFold
+    from sklearn.model_selection import KFold
 
     boston = load_boston()
     y = boston['target']
     X = boston['data']
-    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
-    for train_index, test_index in kf:
+    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf.split(X, y):
         xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])
 
         preds = xgb_model.predict(X[test_index])
@@ -122,15 +111,16 @@ def test_boston_housing_regression():
 
 def test_parameter_tuning():
     tm._skip_if_no_sklearn()
-    from sklearn.grid_search import GridSearchCV
+    from sklearn.model_selection import GridSearchCV
     from sklearn.datasets import load_boston
 
     boston = load_boston()
     y = boston['target']
     X = boston['data']
     xgb_model = xgb.XGBRegressor()
     clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
-                                   'n_estimators': [50, 100, 200]}, verbose=1)
+                                   'n_estimators': [50, 100, 200]},
+                       cv=3, verbose=1, iid=True)
     clf.fit(X, y)
     assert clf.best_score_ < 0.7
     assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4}
@@ -140,7 +130,7 @@ def test_regression_with_custom_objective():
     tm._skip_if_no_sklearn()
     from sklearn.metrics import mean_squared_error
     from sklearn.datasets import load_boston
-    from sklearn.cross_validation import KFold
+    from sklearn.model_selection import KFold
 
     def objective_ls(y_true, y_pred):
         grad = (y_pred - y_true)
@@ -150,8 +140,8 @@ def objective_ls(y_true, y_pred):
     boston = load_boston()
     y = boston['target']
     X = boston['data']
-    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
-    for train_index, test_index in kf:
+    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf.split(X, y):
         xgb_model = xgb.XGBRegressor(objective=objective_ls).fit(
             X[train_index], y[train_index]
         )
@@ -173,7 +163,7 @@ def dummy_objective(y_true, y_pred):
 def test_classification_with_custom_objective():
     tm._skip_if_no_sklearn()
     from sklearn.datasets import load_digits
-    from sklearn.cross_validation import KFold
+    from sklearn.model_selection import KFold
 
     def logregobj(y_true, y_pred):
         y_pred = 1.0 / (1.0 + np.exp(-y_pred))
@@ -184,8 +174,8 @@ def logregobj(y_true, y_pred):
     digits = load_digits(2)
     y = digits['target']
     X = digits['data']
-    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
-    for train_index, test_index in kf:
+    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
+    for train_index, test_index in kf.split(X, y):
         xgb_model = xgb.XGBClassifier(objective=logregobj)
         xgb_model.fit(X[train_index], y[train_index])
         preds = xgb_model.predict(X[test_index])
@@ -212,10 +202,11 @@ def dummy_objective(y_true, y_preds):
 def test_sklearn_api():
     tm._skip_if_no_sklearn()
     from sklearn.datasets import load_iris
-    from sklearn.cross_validation import train_test_split
+    from sklearn.model_selection import train_test_split
 
     iris = load_iris()
-    tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target, train_size=120)
+    tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target,
+                                              train_size=120, test_size=0.2)
 
     classifier = xgb.XGBClassifier(booster='gbtree', n_estimators=10)
     classifier.fit(tr_d, tr_l)
@@ -229,7 +220,7 @@ def test_sklearn_api():
 def test_sklearn_api_gblinear():
     tm._skip_if_no_sklearn()
     from sklearn.datasets import load_iris
-    from sklearn.cross_validation import train_test_split
+    from sklearn.model_selection import train_test_split
 
     iris = load_iris()
     tr_d, te_d, tr_l, te_l = train_test_split(iris.data, iris.target, train_size=120)

diff --git a/tests/travis/run_test.sh b/tests/travis/run_test.sh
@@ -48,15 +48,15 @@ if [ ${TASK} == "python_test" ]; then
     python --version
     conda install numpy scipy pandas matplotlib nose scikit-learn
     python -m pip install graphviz pytest pytest-cov codecov
-    python -m nose tests/python || exit -1
+    python -m nose -v tests/python || exit -1
     py.test tests/python --cov=python-package/xgboost
     codecov
     source activate python2
     echo "-------------------------------"
     python --version
     conda install numpy scipy pandas matplotlib nose scikit-learn
     python -m pip install graphviz
-    python -m nose tests/python || exit -1
+    python -m nose -v tests/python || exit -1
     exit 0
 fi
 
@@ -67,15 +67,15 @@ if [ ${TASK} == "python_lightweight_test" ]; then
     python --version
     conda install numpy scipy nose
     python -m pip install graphviz pytest pytest-cov codecov
-    python -m nose tests/python || exit -1
+    python -m nose -v tests/python || exit -1
     py.test tests/python --cov=python-package/xgboost
     codecov
     source activate python2
     echo "-------------------------------"
     python --version
     conda install numpy scipy nose
     python -m pip install graphviz
-    python -m nose tests/python || exit -1
+    python -m nose -v tests/python || exit -1
     python -m pip install flake8==3.4.1
     flake8 --ignore E501 python-package || exit -1
     flake8 --ignore E501 tests/python || exit -1