Merge pull request #108 from Borda/gpu-flag

enable run XGBoost on GPU with `GPU_flag`
AutoViML · Jan 27, 2024 · 5901369 · 5901369
2 parents 7ac9e54 + c47f07f
commit 5901369
Show file tree

Hide file tree

Showing 3 changed files with 19 additions and 8 deletions.
diff --git a/auto_ts/models/build_ml.py b/auto_ts/models/build_ml.py
@@ -85,7 +85,13 @@ def __init__(self, scoring: str = '', forecast_period: int = 2, ts_column: str =
         self.train_df = pd.DataFrame()
 
 
-    def fit(self, ts_df: pd.DataFrame, target_col: str, ts_column:str, cv: Optional[int]=None, lags: int = 0):
+    def fit(self,
+            ts_df: pd.DataFrame,
+            target_col: str,
+            ts_column:str,
+            cv: Optional[int]=None,
+            lags: int = 0,
+            GPU_flag: bool = False):
         """
         Build a Time Series Model using Machine Learning models.
         Quickly builds and runs multiple models for a clean data set (only numerics).
@@ -306,8 +312,8 @@ def fit(self, ts_df: pd.DataFrame, target_col: str, ts_column:str, cv: Optional[
 
             model_name = 'XGBoost'
             print('### Number of booster rounds = %s for XGBoost which can be set during setup ####' %self.num_boost_rounds)
-            outputs = complex_XGBoost_model(X_train_fold,y_train_fold,
-                        X_test_fold, log_y=False, GPU_flag=False,
+            outputs = complex_XGBoost_model(X_train_fold, y_train_fold,
+                        X_test_fold, log_y=False, GPU_flag=GPU_flag,
                         scaler='', enc_method='', n_splits=cv_in, 
                         num_boost_round=self.num_boost_rounds, verbose=0)
             print('XGBoost model tuning completed')

diff --git a/auto_ts/models/ml_models.py b/auto_ts/models/ml_models.py
@@ -9,6 +9,7 @@
 from sklearn.model_selection import KFold, cross_val_score,StratifiedKFold
 import seaborn as sns
 import re
+import GPUtil
 from xgboost import XGBRegressor, XGBClassifier
 from sklearn.metrics import mean_squared_log_error, mean_squared_error,balanced_accuracy_score
 from scipy import stats
@@ -31,9 +32,10 @@
 from ..utils import My_LabelEncoder, My_LabelEncoder_Pipe
 from ..utils import left_subtract
 #################################################################################
-def complex_XGBoost_model(X_train, y_train, X_test, log_y=False, GPU_flag=False,
-                                scaler = '', enc_method='label', n_splits=5, 
-                                num_boost_round=1000, verbose=-1):
+def complex_XGBoost_model(
+        X_train, y_train, X_test, log_y=False, GPU_flag=False,
+        scaler = '', enc_method='label', n_splits=5,
+        num_boost_round=1000, verbose=-1):
     """
     This model is called complex because it handle multi-label, mulit-class datasets which XGBoost ordinarily cant.
     Just send in X_train, y_train and what you want to predict, X_test
@@ -90,7 +92,7 @@ def complex_XGBoost_model(X_train, y_train, X_test, log_y=False, GPU_flag=False,
     #########     G P U     P R O C E S S I N G      B E G I N S    ############
     ###### This is where we set the CPU and GPU parameters for XGBoost
     if GPU_flag:
-        GPU_exists = check_if_GPU_exists()
+        GPU_exists = len(GPUtil.getAvailable()) > 0
     else:
         GPU_exists = False
     #####   Set the Scoring Parameters here based on each model and preferences of user ###
@@ -101,6 +103,7 @@ def complex_XGBoost_model(X_train, y_train, X_test, log_y=False, GPU_flag=False,
     cpu_params['updater'] = 'grow_colmaker'
     cpu_params['predictor'] = 'cpu_predictor'
     if GPU_exists:
+        param['device'] = "cuda"
         param['tree_method'] = 'gpu_hist'
         param['gpu_id'] = 0
         param['updater'] = 'grow_gpu_hist' #'prune'

diff --git a/requirements.txt b/requirements.txt
@@ -13,9 +13,11 @@ pandas
 xlrd
 scipy
 prettytable
-xgboost>=1.5.1
+xgboost>=2.0.0  # with GPU support
+GPUtil
 dask>=2022.2.0
 distributed>=2022.2.0
+GPUtil>=1.4.0
 pyyaml>=5.4.1
 
 # Viz libs