From ec207a43893be78906fdd75118b5a0b6d599944c Mon Sep 17 00:00:00 2001 From: fis Date: Wed, 13 Jul 2022 13:25:26 +0800 Subject: [PATCH] [doc] Integrate pyspark module into sphinx doc [skip ci] --- doc/conf.py | 3 ++- doc/python/python_api.rst | 26 ++++++++++++++++++++++ doc/requirements.txt | 4 +++- python-package/xgboost/spark/estimator.py | 27 ++++++++++++++--------- 4 files changed, 47 insertions(+), 13 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 53b2ba503915..7ebd0c2af999 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -207,10 +207,11 @@ "python": ("https://docs.python.org/3.6", None), "numpy": ("https://docs.scipy.org/doc/numpy/", None), "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None), - "pandas": ("http://pandas-docs.github.io/pandas-docs-travis/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), "sklearn": ("https://scikit-learn.org/stable", None), "dask": ("https://docs.dask.org/en/stable/", None), "distributed": ("https://distributed.dask.org/en/stable/", None), + "pyspark": ("https://spark.apache.org/docs/latest/api/python/", None), } diff --git a/doc/python/python_api.rst b/doc/python/python_api.rst index 9f077edbc0df..33be6e6275e0 100644 --- a/doc/python/python_api.rst +++ b/doc/python/python_api.rst @@ -147,3 +147,29 @@ Dask API :members: :inherited-members: :show-inheritance: + + +PySpark API +----------- + +.. automodule:: xgboost.spark + +.. autoclass:: xgboost.spark.SparkXGBClassifier + :members: + :inherited-members: + :show-inheritance: + +.. autoclass:: xgboost.spark.SparkXGBClassifierModel + :members: + :inherited-members: + :show-inheritance: + +.. autoclass:: xgboost.spark.SparkXGBRegressor + :members: + :inherited-members: + :show-inheritance: + +.. autoclass:: xgboost.spark.SparkXGBRegressorModel + :members: + :inherited-members: + :show-inheritance: diff --git a/doc/requirements.txt b/doc/requirements.txt index fed325a3fc26..0330814ab520 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -9,4 +9,6 @@ graphviz numpy recommonmark xgboost_ray -sphinx-gallery \ No newline at end of file +sphinx-gallery +pyspark +cloudpickle \ No newline at end of file diff --git a/python-package/xgboost/spark/estimator.py b/python-package/xgboost/spark/estimator.py index 664d7c06182e..1af42c3ae120 100644 --- a/python-package/xgboost/spark/estimator.py +++ b/python-package/xgboost/spark/estimator.py @@ -15,12 +15,13 @@ class SparkXGBRegressor(_SparkXGBEstimator): """ SparkXGBRegressor is a PySpark ML estimator. It implements the XGBoost regression algorithm based on XGBoost python library, and it can be used in PySpark Pipeline - and PySpark ML meta algorithms like CrossValidator/TrainValidationSplit/OneVsRest. + and PySpark ML meta algorithms like :py:class:`~pyspark.ml.tuning.CrossValidator`/ + :py:class:`~pyspark.ml.tuning.TrainValidationSplit`/ + :py:class:`~pyspark.ml.classification.OneVsRest` SparkXGBRegressor automatically supports most of the parameters in `xgboost.XGBRegressor` constructor and most of the parameters used in - `xgboost.XGBRegressor` fit and predict method (see `API docs `_ for details). + :py:class:`xgboost.XGBRegressor` fit and predict method. SparkXGBRegressor doesn't support setting `gpu_id` but support another param `use_gpu`, see doc below for more details. @@ -65,7 +66,8 @@ class SparkXGBRegressor(_SparkXGBEstimator): .. Note:: This API is experimental. - **Examples** + Examples + -------- >>> from xgboost.spark import SparkXGBRegressor >>> from pyspark.ml.linalg import Vectors @@ -104,15 +106,16 @@ def _pyspark_model_cls(cls): class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPredictionCol): - """ - SparkXGBClassifier is a PySpark ML estimator. It implements the XGBoost classification - algorithm based on XGBoost python library, and it can be used in PySpark Pipeline - and PySpark ML meta algorithms like CrossValidator/TrainValidationSplit/OneVsRest. + """SparkXGBClassifier is a PySpark ML estimator. It implements the XGBoost + classification algorithm based on XGBoost python library, and it can be used in + PySpark Pipeline and PySpark ML meta algorithms like + :py:class:`~pyspark.ml.tuning.CrossValidator`/ + :py:class:`~pyspark.ml.tuning.TrainValidationSplit`/ + :py:class:`~pyspark.ml.classification.OneVsRest` SparkXGBClassifier automatically supports most of the parameters in `xgboost.XGBClassifier` constructor and most of the parameters used in - `xgboost.XGBClassifier` fit and predict method (see `API docs `_ for details). + :py:class:`xgboost.XGBClassifier` fit and predict method. SparkXGBClassifier doesn't support setting `gpu_id` but support another param `use_gpu`, see doc below for more details. @@ -127,6 +130,7 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction Parameters ---------- + callbacks: The export and import of the callback functions are at best effort. For details, see :py:attr:`xgboost.spark.SparkXGBClassifier.callbacks` param doc. @@ -166,7 +170,8 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction .. Note:: This API is experimental. - **Examples** + Examples + -------- >>> from xgboost.spark import SparkXGBClassifier >>> from pyspark.ml.linalg import Vectors