microsoft · bartlesy · Aug 23, 2019
@@ -525,7 +525,7 @@ def fit(self, X, y,
             eval_metric = [eval_metric] if isinstance(eval_metric, (string_type, type(None))) else eval_metric
             params['metric'] = set(original_metric + eval_metric)
 
-        if not isinstance(X, (DataFrame, DataTable)):
+        if not isinstance(X, DataTable):
             _X, _y = _LGBMCheckXY(X, y, accept_sparse=True, force_all_finite=False, ensure_min_samples=2)
             _LGBMCheckConsistentLength(_X, _y, sample_weight)
         else:

@@ -298,6 +298,35 @@ def test_pandas_sparse(self):
             pred_dense = gbm.predict(X_test.to_dense(), raw_score=True)
         np.testing.assert_allclose(pred_sparse, pred_dense)
 
+    @unittest.skipIf(not lgb.compat.PANDAS_INSTALLED, 'pandas is not installed')
+    def test_nan_y_pd(self):
+        import pandas as pd
+
+        nrows = 1000
+        ncols = 10
+        X = pd.DataFrame(np.random.randn(nrows, ncols))
+        y = pd.Series(np.full(nrows, np.nan))
+        weight = np.zeros(nrows)
+        params = {'n_estimators': 20, 'verbose': -1}
+        params_fit = {'X': X, 'y': y}
+        gbm = lgb.LGBMRegressor(**params)
+        self.assertRaises(ValueError, gbm.fit, **params_fit)
+
+    @unittest.skipIf(not lgb.compat.PANDAS_INSTALLED, 'pandas is not installed')
+    def test_nan_handle_pd(self):
+        import pandas as pd
+
+        nrows = 1000
+        ncols = 10
+        X = pd.DataFrame(np.random.randn(nrows, ncols))
+        y = pd.Series(np.random.randn(nrows) + np.full(nrows, 1e30))
+        weight = np.zeros(nrows)
+        params = {'n_estimators': 20, 'verbose': -1}
+        params_fit = {'X': X, 'y': y, 'sample_weight': weight, 'eval_set': (X, y),
+                      'verbose': False, 'early_stopping_rounds': 5}
+        gbm = lgb.LGBMRegressor(**params).fit(**params_fit)
+        np.testing.assert_allclose(gbm.evals_result_['training']['l2'], np.nan)
+
     def test_predict(self):
         iris = load_iris()
         X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
@@ -638,6 +667,17 @@ def test_nan_handle(self):
         gbm = lgb.LGBMRegressor(**params).fit(**params_fit)
         np.testing.assert_allclose(gbm.evals_result_['training']['l2'], np.nan)
 
+    def test_nan_y_np(self):
+        nrows = 1000
+        ncols = 10
+        X = np.random.randn(nrows, ncols)
+        y = np.full(nrows, np.nan)
+        weight = np.zeros(nrows)
+        params = {'n_estimators': 20, 'verbose': -1}
+        params_fit = {'X': X, 'y': y}
+        gbm = lgb.LGBMRegressor(**params)
+        self.assertRaises(ValueError, gbm.fit, **params_fit)
+
     def test_class_weight(self):
         X, y = load_digits(10, True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)