Add doctest (rapidsai#4618)

Continuation of rapidsai#2975 to change docstring to a doctest format. I'm also adding a pytest file to execute doctest, similar to what's being done in [cudf](https://github.com/rapidsai/cudf/blob/branch-22.04/python/cudf/cudf/tests/test_doctests.py) Authors: - Micka (https://github.com/lowener) - Yuqiong Li (https://github.com/yuqli) - Dante Gama Dessavre (https://github.com/dantegd) - Michael Demoret (https://github.com/mdemoret-nv) Approvers: - Dante Gama Dessavre (https://github.com/dantegd) URL: rapidsai#4618
vimarsh6739 · Mar 29, 2022 · f7570fb · f7570fb
1 parent 8f30123
commit f7570fb
Show file tree

Hide file tree

Showing 91 changed files with 2,157 additions and 1,969 deletions.
diff --git a/python/cuml/__init__.py b/python/cuml/__init__.py
@@ -111,3 +111,62 @@ def __getattr__(name):
             return _global_settings_data.settings
 
     raise AttributeError(f"module {__name__} has no attribute {name}")
+
+
+__all__ = [
+    # Modules
+    "common",
+    "metrics",
+    "multiclass",
+    "naive_bayes",
+    "preprocessing",
+    # Classes
+    "AgglomerativeClustering",
+    "ARIMA",
+    "AutoARIMA",
+    "Base",
+    "CD",
+    "cuda",
+    "DBSCAN",
+    "ElasticNet",
+    "ExponentialSmoothing",
+    "ForestInference",
+    "GaussianRandomProjection",
+    "Handle",
+    "HDBSCAN",
+    "IncrementalPCA",
+    "KernelDensity",
+    "KernelExplainer",
+    "KernelRidge",
+    "KMeans",
+    "KNeighborsClassifier",
+    "KNeighborsRegressor",
+    "Lasso",
+    "LinearRegression",
+    "LinearSVC",
+    "LinearSVR",
+    "LogisticRegression",
+    "MBSGDClassifier",
+    "MBSGDRegressor",
+    "NearestNeighbors",
+    "PCA",
+    "PermutationExplainer",
+    "QN",
+    "RandomForestClassifier",
+    "RandomForestRegressor",
+    "Ridge",
+    "SGD",
+    "SparseRandomProjection",
+    "SVC",
+    "SVR",
+    "TruncatedSVD",
+    "TSNE",
+    "UMAP",
+    # Functions
+    "johnson_lindenstrauss_min_dim",
+    "make_arima",
+    "make_blobs",
+    "make_classification",
+    "make_regression",
+    "stationarity",
+]
diff --git a/python/cuml/_thirdparty/sklearn/preprocessing/_data.py b/python/cuml/_thirdparty/sklearn/preprocessing/_data.py
@@ -258,7 +258,9 @@ class MinMaxScaler(TransformerMixin,
     Examples
     --------
     >>> from cuml.preprocessing import MinMaxScaler
+    >>> import cupy as cp
     >>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]
+    >>> data = cp.array(data)
     >>> scaler = MinMaxScaler()
     >>> print(scaler.fit(data))
     MinMaxScaler()
@@ -269,7 +271,7 @@ class MinMaxScaler(TransformerMixin,
      [0.25 0.25]
      [0.5  0.5 ]
      [1.   1.  ]]
-    >>> print(scaler.transform([[2, 2]]))
+    >>> print(scaler.transform(cp.array([[2, 2]])))
     [[1.5 0. ]]
 
     See also
@@ -577,7 +579,9 @@ class StandardScaler(TransformerMixin,
     Examples
     --------
     >>> from cuml.preprocessing import StandardScaler
+    >>> import cupy as cp
     >>> data = [[0, 0], [0, 0], [1, 1], [1, 1]]
+    >>> data = cp.array(data)
     >>> scaler = StandardScaler()
     >>> print(scaler.fit(data))
     StandardScaler()
@@ -588,7 +592,7 @@ class StandardScaler(TransformerMixin,
      [-1. -1.]
      [ 1.  1.]
      [ 1.  1.]]
-    >>> print(scaler.transform([[2, 2]]))
+    >>> print(scaler.transform(cp.array([[2, 2]])))
     [[3. 3.]]
 
     See also
@@ -649,7 +653,7 @@ def fit(self, X, y=None) -> "StandardScaler":
             The data used to compute the mean and standard deviation
             used for later scaling along the features axis.
 
-        y
+        y : None
             Ignored
         """
 
@@ -893,9 +897,11 @@ class MaxAbsScaler(TransformerMixin,
     Examples
     --------
     >>> from cuml.preprocessing import MaxAbsScaler
+    >>> import cupy as cp
     >>> X = [[ 1., -1.,  2.],
     ...      [ 2.,  0.,  0.],
     ...      [ 0.,  1., -1.]]
+    >>> X = cp.array(X)
     >>> transformer = MaxAbsScaler().fit(X)
     >>> transformer
     MaxAbsScaler()
@@ -1151,9 +1157,11 @@ class RobustScaler(TransformerMixin,
     Examples
     --------
     >>> from cuml.preprocessing import RobustScaler
+    >>> import cupy as cp
     >>> X = [[ 1., -2.,  2.],
     ...      [ -2.,  1.,  3.],
     ...      [ 4.,  1., -2.]]
+    >>> X = cp.array(X)
     >>> transformer = RobustScaler().fit(X)
     >>> transformer
     RobustScaler()
@@ -1786,9 +1794,11 @@ class Normalizer(TransformerMixin,
     Examples
     --------
     >>> from cuml.preprocessing import Normalizer
+    >>> import cupy as cp
     >>> X = [[4, 1, 2, 2],
     ...      [1, 3, 9, 3],
     ...      [5, 7, 5, 1]]
+    >>> X = cp.array(X)
     >>> transformer = Normalizer().fit(X)  # fit does nothing.
     >>> transformer
     Normalizer()
@@ -1913,9 +1923,11 @@ class Binarizer(TransformerMixin,
     Examples
     --------
     >>> from cuml.preprocessing import Binarizer
+    >>> import cupy as cp
     >>> X = [[ 1., -1.,  2.],
     ...      [ 2.,  0.,  0.],
     ...      [ 0.,  1., -1.]]
+    >>> X = cp.array(X)
     >>> transformer = Binarizer().fit(X)  # fit does nothing.
     >>> transformer
     Binarizer()
@@ -1996,7 +2008,8 @@ def add_dummy_feature(X, value=1.0):
     --------
 
     >>> from cuml.preprocessing import add_dummy_feature
-    >>> add_dummy_feature([[0, 1], [1, 0]])
+    >>> import cupy as cp
+    >>> add_dummy_feature(cp.array([[0, 1], [1, 0]]))
     array([[1., 0., 1.],
            [1., 1., 0.]])
     """

diff --git a/python/cuml/_thirdparty/sklearn/preprocessing/_discretization.py b/python/cuml/_thirdparty/sklearn/preprocessing/_discretization.py
@@ -103,19 +103,22 @@ class KBinsDiscretizer(TransformerMixin,
 
     Examples
     --------
+    >>> from cuml.preprocessing import KBinsDiscretizer
+    >>> import numpy as np
     >>> X = [[-2, 1, -4,   -1],
     ...      [-1, 2, -3, -0.5],
     ...      [ 0, 3, -2,  0.5],
     ...      [ 1, 4, -1,    2]]
+    >>> X = np.array(X)
     >>> est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
     >>> est.fit(X)
     KBinsDiscretizer(...)
     >>> Xt = est.transform(X)
-    >>> Xt  # doctest: +SKIP
-    array([[ 0., 0., 0., 0.],
-           [ 1., 1., 1., 0.],
-           [ 2., 2., 2., 1.],
-           [ 2., 2., 2., 2.]])
+    >>> Xt
+    array([[0, 0, 0, 0],
+           [1, 1, 1, 0],
+           [2, 2, 2, 1],
+           [2, 2, 2, 2]], dtype=int32)
 
     Sometimes it may be useful to convert the data back into the original
     feature space. The ``inverse_transform`` function converts the binned

diff --git a/python/cuml/cluster/dbscan.pyx b/python/cuml/cluster/dbscan.pyx
@@ -111,29 +111,26 @@ class DBSCAN(Base,
 
     .. code-block:: python
 
-            # Both import methods supported
-            from cuml import DBSCAN
-            from cuml.cluster import DBSCAN
-
-            import cudf
-            import numpy as np
-
-            gdf_float = cudf.DataFrame()
-            gdf_float['0'] = np.asarray([1.0,2.0,5.0], dtype = np.float32)
-            gdf_float['1'] = np.asarray([4.0,2.0,1.0], dtype = np.float32)
-            gdf_float['2'] = np.asarray([4.0,2.0,1.0], dtype = np.float32)
-
-            dbscan_float = DBSCAN(eps = 1.0, min_samples = 1)
-            dbscan_float.fit(gdf_float)
-            print(dbscan_float.labels_)
-
-    Output:
-
-    .. code-block:: python
-
-            0    0
-            1    1
-            2    2
+        >>> # Both import methods supported
+        >>> from cuml import DBSCAN
+        >>> from cuml.cluster import DBSCAN
+        >>>
+        >>> import cudf
+        >>> import numpy as np
+        >>>
+        >>> gdf_float = cudf.DataFrame()
+        >>> gdf_float['0'] = np.asarray([1.0,2.0,5.0], dtype = np.float32)
+        >>> gdf_float['1'] = np.asarray([4.0,2.0,1.0], dtype = np.float32)
+        >>> gdf_float['2'] = np.asarray([4.0,2.0,1.0], dtype = np.float32)
+        >>>
+        >>> dbscan_float = DBSCAN(eps = 1.0, min_samples = 1)
+        >>> dbscan_float.fit(gdf_float)
+        DBSCAN()
+        >>> dbscan_float.labels_
+        0    0
+        1    1
+        2    2
+        dtype: int32
 
     Parameters
     -----------

diff --git a/python/cuml/cluster/hdbscan.pyx b/python/cuml/cluster/hdbscan.pyx
@@ -323,7 +323,7 @@ class HDBSCAN(Base, ClusterMixin, CMajorInputTagMixin):
         for new points in future (e.g. using approximate_predict), as
         the approximate_predict function is not aware of this argument.
 
-    metric : string or callable, optional (default='minkowski')
+    metric : string or callable, optional (default='euclidean')
         The metric to use when calculating distance between instances in a
         feature array. If metric is a string or callable, it must be one of
         the options allowed by metrics.pairwise.pairwise_distances for its