diff --git a/client/src/Components/ParameterView/ParameterView.css b/client/src/Components/ParameterView/ParameterView.css
index 74df6b29d..b3a5dfd67 100644
--- a/client/src/Components/ParameterView/ParameterView.css
+++ b/client/src/Components/ParameterView/ParameterView.css
@@ -3,14 +3,17 @@
position: -webkit-sticky;
position: sticky;
top: 0;
+ overflow-x: hidden;
}
.parameterViewDiv h2{
padding-top: 1rem;
+ padding-left: 1rem;
}
.parameterViewDiv h5{
padding-top: 1rem;
+ padding-left: 1rem;
}
.parameterViewDiv a{
@@ -22,12 +25,22 @@
padding-bottom: .5rem;
}
+.docuTextHidden {
+ text-overflow: ellipsis;
+}
+
+.docu-paragraph{
+ display: flex;
+ justify-content: flex-start;
+}
+
.parameter-header {
+ display: flex;
+ justify-content: flex-start;
font-weight: bold;
}
.parameter-name {
- float: left;
margin-right: 15px;
}
@@ -41,15 +54,11 @@
}
.read-more-button {
+ text-align: left;
background: none!important;
border: none;
- /*padding: 0!important;*/
padding-left: 0.5rem;
- /*optional*/
font-family: arial, sans-serif;
- /*input has OS specific font-family*/
- color: #069;
- text-decoration: underline;
cursor: pointer;
}
diff --git a/client/src/Components/TreeView/TreeView.tsx b/client/src/Components/TreeView/TreeView.tsx
index 7a440fceb..ce79a2c1a 100644
--- a/client/src/Components/TreeView/TreeView.tsx
+++ b/client/src/Components/TreeView/TreeView.tsx
@@ -1,7 +1,7 @@
import React from 'react'
import Tree from "../Tree/Tree";
import './tree-view.css';
-import packageJson from "../../sklearn.json";
+import packageJson from "../../data/sklearn_new_schema.json";
import PythonPackageBuilder from "../../model/PythonPackageBuilder";
type TreeViewProps = {
diff --git a/client/src/data/sklearn.json b/client/src/data/sklearn.json
deleted file mode 100644
index 54d099cf6..000000000
--- a/client/src/data/sklearn.json
+++ /dev/null
@@ -1,127935 +0,0 @@
-{
- "name": "sklearn",
- "modules": [
- {
- "name": "sklearn.base",
- "imports": [
- "import copy",
- "import warnings",
- "from collections import defaultdict",
- "import platform",
- "import inspect",
- "import re",
- "import numpy as np",
- "from None import __version__",
- "from _config import get_config",
- "from utils import _IS_32BIT",
- "from utils._tags import _DEFAULT_TAGS",
- "from utils._tags import _safe_tags",
- "from utils.validation import check_X_y",
- "from utils.validation import check_array",
- "from utils._estimator_html_repr import estimator_html_repr",
- "from utils.validation import _deprecate_positional_args",
- "from utils._pprint import _EstimatorPrettyPrinter",
- "from metrics import accuracy_score",
- "from metrics import r2_score"
- ],
- "classes": [
- {
- "name": "BaseEstimator",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "_get_param_names",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Get parameter names for the estimator"
- },
- {
- "name": "get_params",
- "decorators": [],
- "parameters": [
- {
- "name": "deep",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Get parameters for this estimator.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values."
- },
- {
- "name": "set_params",
- "decorators": [],
- "parameters": [
- {
- "name": "**params",
- "type": "Dict",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Estimator parameters."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Set the parameters of this estimator.\n\nThe method works on simple estimators as well as on nested objects\n(such as :class:`~sklearn.pipeline.Pipeline`). The latter have\nparameters of the form ``__`` so that it's\npossible to update each component of a nested object.\n\nParameters\n----------\n**params : dict\n Estimator parameters.\n\nReturns\n-------\nself : estimator instance\n Estimator instance."
- },
- {
- "name": "__repr__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "__getstate__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "__setstate__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_get_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_n_features",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The input samples."
- },
- {
- "name": "reset",
- "type": "bool",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the `n_features_in_` attribute is set to `X.shape[1]`. If False and the attribute exists, then check that it is equal to `X.shape[1]`. If False and the attribute does *not* exist, then the check is skipped. .. note:: It is recommended to call reset=True in `fit` and in the first call to `partial_fit`. All other methods that validate `X` should set `reset=False`."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Set the `n_features_in_` attribute, or check against it.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples.\nreset : bool\n If True, the `n_features_in_` attribute is set to `X.shape[1]`.\n If False and the attribute exists, then check that it is equal to\n `X.shape[1]`. If False and the attribute does *not* exist, then\n the check is skipped.\n .. note::\n It is recommended to call reset=True in `fit` and in the first\n call to `partial_fit`. All other methods that validate `X`\n should set `reset=False`."
- },
- {
- "name": "_validate_data",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The input samples."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "'no_validation'",
- "limitation": null,
- "ignored": false,
- "docstring": "The targets. - If `None`, `check_array` is called on `X`. If the estimator's requires_y tag is True, then an error will be raised. - If `'no_validation'`, `check_array` is called on `X` and the estimator's requires_y tag is ignored. This is a default placeholder and is never meant to be explicitly set. - Otherwise, both `X` and `y` are checked with either `check_array` or `check_X_y` depending on `validate_separately`."
- },
- {
- "name": "reset",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to reset the `n_features_in_` attribute. If False, the input will be checked for consistency with data provided when reset was last True. .. note:: It is recommended to call reset=True in `fit` and in the first call to `partial_fit`. All other methods that validate `X` should set `reset=False`."
- },
- {
- "name": "validate_separately",
- "type": "Union[Literal[False], Tuple[]]",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Only used if y is not None. If False, call validate_X_y(). Else, it must be a tuple of kwargs to be used for calling check_array() on X and y respectively."
- },
- {
- "name": "**check_params",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameters passed to :func:`sklearn.utils.check_array` or :func:`sklearn.utils.check_X_y`. Ignored if validate_separately is not False."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Validate input data and set or check the `n_features_in_` attribute.\n\nParameters\n----------\nX : {array-like, sparse matrix, dataframe} of shape (n_samples, n_features)\n The input samples.\ny : array-like of shape (n_samples,), default='no_validation'\n The targets.\n\n - If `None`, `check_array` is called on `X`. If the estimator's\n requires_y tag is True, then an error will be raised.\n - If `'no_validation'`, `check_array` is called on `X` and the\n estimator's requires_y tag is ignored. This is a default\n placeholder and is never meant to be explicitly set.\n - Otherwise, both `X` and `y` are checked with either `check_array`\n or `check_X_y` depending on `validate_separately`.\n\nreset : bool, default=True\n Whether to reset the `n_features_in_` attribute.\n If False, the input will be checked for consistency with data\n provided when reset was last True.\n .. note::\n It is recommended to call reset=True in `fit` and in the first\n call to `partial_fit`. All other methods that validate `X`\n should set `reset=False`.\nvalidate_separately : False or tuple of dicts, default=False\n Only used if y is not None.\n If False, call validate_X_y(). Else, it must be a tuple of kwargs\n to be used for calling check_array() on X and y respectively.\n**check_params : kwargs\n Parameters passed to :func:`sklearn.utils.check_array` or\n :func:`sklearn.utils.check_X_y`. Ignored if validate_separately\n is not False.\n\nReturns\n-------\nout : {ndarray, sparse matrix} or tuple of these\n The validated input. A tuple is returned if `y` is not None."
- },
- {
- "name": "_repr_html_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "HTML representation of estimator.\n\nThis is redundant with the logic of `_repr_mimebundle_`. The latter\nshould be favorted in the long term, `_repr_html_` is only\nimplemented for consumers who do not interpret `_repr_mimbundle_`."
- },
- {
- "name": "_repr_html_inner",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "This function is returned by the @property `_repr_html_` to make\n`hasattr(estimator, \"_repr_html_\") return `True` or `False` depending\non `get_config()[\"display\"]`."
- },
- {
- "name": "_repr_mimebundle_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Mime bundle used by jupyter kernels to display estimator"
- }
- ],
- "docstring": "Base class for all estimators in scikit-learn.\n\nNotes\n-----\nAll estimators should specify all the parameters that can be set\nat the class level in their ``__init__`` as explicit keyword\narguments (no ``*args`` or ``**kwargs``)."
- },
- {
- "name": "ClassifierMixin",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "score",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Test samples."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "True labels for `X`."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample weights."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return the mean accuracy on the given test data and labels.\n\nIn multi-label classification, this is the subset accuracy\nwhich is a harsh metric since you require for each sample that\neach label set be correctly predicted.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for `X`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Mean accuracy of ``self.predict(X)`` wrt. `y`."
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Mixin class for all classifiers in scikit-learn."
- },
- {
- "name": "RegressorMixin",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "score",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Test samples. For some estimators this may be a precomputed kernel matrix or a list of generic objects instead with shape ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted`` is the number of samples used in the fitting for the estimator."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "True values for `X`."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample weights."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return the coefficient of determination :math:`R^2` of the\nprediction.\n\nThe coefficient :math:`R^2` is defined as :math:`(1 - \\frac{u}{v})`,\nwhere :math:`u` is the residual sum of squares ``((y_true - y_pred)\n** 2).sum()`` and :math:`v` is the total sum of squares ``((y_true -\ny_true.mean()) ** 2).sum()``. The best possible score is 1.0 and it\ncan be negative (because the model can be arbitrarily worse). A\nconstant model that always predicts the expected value of `y`,\ndisregarding the input features, would get a :math:`R^2` score of\n0.0.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples. For some estimators this may be a precomputed\n kernel matrix or a list of generic objects instead with shape\n ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted``\n is the number of samples used in the fitting for the estimator.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True values for `X`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n :math:`R^2` of ``self.predict(X)`` wrt. `y`.\n\nNotes\n-----\nThe :math:`R^2` score used when calling ``score`` on a regressor uses\n``multioutput='uniform_average'`` from version 0.23 to keep consistent\nwith default value of :func:`~sklearn.metrics.r2_score`.\nThis influences the ``score`` method of all the multioutput\nregressors (except for\n:class:`~sklearn.multioutput.MultiOutputRegressor`)."
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Mixin class for all regression estimators in scikit-learn."
- },
- {
- "name": "ClusterMixin",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "fit_predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Perform clustering on `X` and returns cluster labels.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,), dtype=np.int64\n Cluster labels."
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Mixin class for all cluster estimators in scikit-learn."
- },
- {
- "name": "BiclusterMixin",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "biclusters_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Convenient way to get row and column indicators together.\n\nReturns the ``rows_`` and ``columns_`` members."
- },
- {
- "name": "get_indices",
- "decorators": [],
- "parameters": [
- {
- "name": "i",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The index of the cluster."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Row and column indices of the `i`'th bicluster.\n\nOnly works if ``rows_`` and ``columns_`` attributes exist.\n\nParameters\n----------\ni : int\n The index of the cluster.\n\nReturns\n-------\nrow_ind : ndarray, dtype=np.intp\n Indices of rows in the dataset that belong to the bicluster.\ncol_ind : ndarray, dtype=np.intp\n Indices of columns in the dataset that belong to the bicluster."
- },
- {
- "name": "get_shape",
- "decorators": [],
- "parameters": [
- {
- "name": "i",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The index of the cluster."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Shape of the `i`'th bicluster.\n\nParameters\n----------\ni : int\n The index of the cluster.\n\nReturns\n-------\nn_rows : int\n Number of rows in the bicluster.\n\nn_cols : int\n Number of columns in the bicluster."
- },
- {
- "name": "get_submatrix",
- "decorators": [],
- "parameters": [
- {
- "name": "i",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The index of the cluster."
- },
- {
- "name": "data",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The data."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return the submatrix corresponding to bicluster `i`.\n\nParameters\n----------\ni : int\n The index of the cluster.\ndata : array-like of shape (n_samples, n_features)\n The data.\n\nReturns\n-------\nsubmatrix : ndarray of shape (n_rows, n_cols)\n The submatrix corresponding to bicluster `i`.\n\nNotes\n-----\nWorks with sparse matrices. Only works if ``rows_`` and\n``columns_`` attributes exist."
- }
- ],
- "docstring": "Mixin class for all bicluster estimators in scikit-learn."
- },
- {
- "name": "TransformerMixin",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "fit_transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input samples."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values (None for unsupervised transformations)."
- },
- {
- "name": "**fit_params",
- "type": "Dict",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Additional fit parameters."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit to data, then transform it.\n\nFits transformer to `X` and `y` with optional parameters `fit_params`\nand returns a transformed version of `X`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input samples.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None\n Target values (None for unsupervised transformations).\n\n**fit_params : dict\n Additional fit parameters.\n\nReturns\n-------\nX_new : ndarray array of shape (n_samples, n_features_new)\n Transformed array."
- }
- ],
- "docstring": "Mixin class for all transformers in scikit-learn."
- },
- {
- "name": "DensityMixin",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "score",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Test samples."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return the score of the model on the data `X`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nscore : float"
- }
- ],
- "docstring": "Mixin class for all density estimators in scikit-learn."
- },
- {
- "name": "OutlierMixin",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "fit_predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Perform fit on X and returns labels for X.\n\nReturns -1 for outliers and 1 for inliers.\n\nParameters\n----------\nX : {array-like, sparse matrix, dataframe} of shape (n_samples, n_features)\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\ny : ndarray of shape (n_samples,)\n 1 for inliers, -1 for outliers."
- }
- ],
- "docstring": "Mixin class for all outlier detection estimators in scikit-learn."
- },
- {
- "name": "MetaEstimatorMixin",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- }
- ],
- "docstring": null
- },
- {
- "name": "MultiOutputMixin",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Mixin to mark estimators that support multioutput."
- },
- {
- "name": "_UnstableArchMixin",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Mark estimators that are non-determinstic on 32bit or PowerPC"
- }
- ],
- "functions": [
- {
- "name": "clone",
- "decorators": [],
- "parameters": [
- {
- "name": "estimator",
- "type": "Union[List, Tuple[], Set]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The estimator or group of estimators to be cloned."
- },
- {
- "name": "safe",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If safe is False, clone will fall back to a deep copy on objects that are not estimators."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Constructs a new unfitted estimator with the same parameters.\n\nClone does a deep copy of the model in an estimator\nwithout actually copying attached data. It yields a new estimator\nwith the same parameters that has not been fitted on any data.\n\nIf the estimator's `random_state` parameter is an integer (or if the\nestimator doesn't have a `random_state` parameter), an *exact clone* is\nreturned: the clone and the original estimator will give the exact same\nresults. Otherwise, *statistical clone* is returned: the clone might\nyield different results from the original estimator. More details can be\nfound in :ref:`randomness`.\n\nParameters\n----------\nestimator : {list, tuple, set} of estimator instance or a single estimator instance\n The estimator or group of estimators to be cloned.\n\nsafe : bool, default=True\n If safe is False, clone will fall back to a deep copy on objects\n that are not estimators."
- },
- {
- "name": "_pprint",
- "decorators": [],
- "parameters": [
- {
- "name": "params",
- "type": "Dict",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The dictionary to pretty print"
- },
- {
- "name": "offset",
- "type": "int",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "The offset in characters to add at the begin of each line."
- },
- {
- "name": "printer",
- "type": "Callable",
- "hasDefault": true,
- "default": "repr",
- "limitation": null,
- "ignored": false,
- "docstring": "The function to convert entries to strings, typically the builtin str or repr"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Pretty print the dictionary 'params'\n\nParameters\n----------\nparams : dict\n The dictionary to pretty print\n\noffset : int, default=0\n The offset in characters to add at the begin of each line.\n\nprinter : callable, default=repr\n The function to convert entries to strings, typically\n the builtin str or repr"
- },
- {
- "name": "is_classifier",
- "decorators": [],
- "parameters": [
- {
- "name": "estimator",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Estimator object to test."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return True if the given estimator is (probably) a classifier.\n\nParameters\n----------\nestimator : object\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if estimator is a classifier and False otherwise."
- },
- {
- "name": "is_regressor",
- "decorators": [],
- "parameters": [
- {
- "name": "estimator",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Estimator object to test."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return True if the given estimator is (probably) a regressor.\n\nParameters\n----------\nestimator : estimator instance\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if estimator is a regressor and False otherwise."
- },
- {
- "name": "is_outlier_detector",
- "decorators": [],
- "parameters": [
- {
- "name": "estimator",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Estimator object to test."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return True if the given estimator is (probably) an outlier detector.\n\nParameters\n----------\nestimator : estimator instance\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if estimator is an outlier detector and False otherwise."
- },
- {
- "name": "_is_pairwise",
- "decorators": [],
- "parameters": [
- {
- "name": "estimator",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Estimator object to test."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Returns True if estimator is pairwise.\n\n- If the `_pairwise` attribute and the tag are present and consistent,\n then use the value and not issue a warning.\n- If the `_pairwise` attribute and the tag are present and not\n consistent, use the `_pairwise` value and issue a deprecation\n warning.\n- If only the `_pairwise` attribute is present and it is not False,\n issue a deprecation warning and use the `_pairwise` value.\n\nParameters\n----------\nestimator : object\n Estimator object to test.\n\nReturns\n-------\nout : bool\n True if the estimator is pairwise and False otherwise."
- }
- ]
- },
- {
- "name": "sklearn.calibration",
- "imports": [
- "import warnings",
- "from inspect import signature",
- "from contextlib import suppress",
- "from functools import partial",
- "from math import log",
- "import numpy as np",
- "from joblib import Parallel",
- "from scipy.special import expit",
- "from scipy.special import xlogy",
- "from scipy.optimize import fmin_bfgs",
- "from base import BaseEstimator",
- "from base import ClassifierMixin",
- "from base import RegressorMixin",
- "from base import clone",
- "from base import MetaEstimatorMixin",
- "from preprocessing import label_binarize",
- "from preprocessing import LabelEncoder",
- "from utils import check_array",
- "from utils import column_or_1d",
- "from utils import deprecated",
- "from utils import indexable",
- "from utils.multiclass import check_classification_targets",
- "from utils.fixes import delayed",
- "from utils.validation import check_is_fitted",
- "from utils.validation import check_consistent_length",
- "from utils.validation import _check_sample_weight",
- "from pipeline import Pipeline",
- "from isotonic import IsotonicRegression",
- "from svm import LinearSVC",
- "from model_selection import check_cv",
- "from model_selection import cross_val_predict",
- "from utils.validation import _deprecate_positional_args"
- ],
- "classes": [
- {
- "name": "CalibratedClassifierCV",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "base_estimator",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The classifier whose output need to be calibrated to provide more accurate `predict_proba` outputs. The default classifier is a :class:`~sklearn.svm.LinearSVC`."
- },
- {
- "name": "method",
- "type": "Literal['sigmoid', 'isotonic']",
- "hasDefault": true,
- "default": "'sigmoid'",
- "limitation": null,
- "ignored": false,
- "docstring": "The method to use for calibration. Can be 'sigmoid' which corresponds to Platt's method (i.e. a logistic regression model) or 'isotonic' which is a non-parametric approach. It is not advised to use isotonic calibration with too few calibration samples ``(<<1000)`` since it tends to overfit."
- },
- {
- "name": "cv",
- "type": "Union[Literal[\"prefit\"], int]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if ``y`` is binary or multiclass, :class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is neither binary nor multiclass, :class:`~sklearn.model_selection.KFold` is used. Refer to the :ref:`User Guide ` for the various cross-validation strategies that can be used here. If \"prefit\" is passed, it is assumed that `base_estimator` has been fitted already and all data is used for calibration. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. Base estimator clones are fitted in parallel across cross-validation iterations. Therefore parallelism happens only when `cv != \"prefit\"`. See :term:`Glossary ` for more details. .. versionadded:: 0.24"
- },
- {
- "name": "ensemble",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines how the calibrator is fitted when `cv` is not `'prefit'`. Ignored if `cv='prefit'`. If `True`, the `base_estimator` is fitted using training data and calibrated using testing data, for each `cv` fold. The final estimator is an ensemble of `n_cv` fitted classifer and calibrator pairs, where `n_cv` is the number of cross-validation folds. The output is the average predicted probabilities of all pairs. If `False`, `cv` is used to compute unbiased predictions, via :func:`~sklearn.model_selection.cross_val_predict`, which are then used for calibration. At prediction time, the classifier used is the `base_estimator` trained on all the data. Note that this method is also internally implemented in :mod:`sklearn.svm` estimators with the `probabilities=True` parameter. .. versionadded:: 0.24"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample weights. If None, then samples are equally weighted."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the calibrated model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\nself : object\n Returns an instance of self."
- },
- {
- "name": "predict_proba",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The samples."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Calibrated probabilities of classification.\n\nThis function returns calibrated probabilities of classification\naccording to each class on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The samples.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n The predicted probas."
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The samples."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Predict the target of new samples. The predicted class is the\nclass that has the highest probability, and can thus be different\nfrom the prediction of the uncalibrated classifier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The samples.\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n The predicted class."
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Probability calibration with isotonic regression or logistic regression.\n\nThis class uses cross-validation to both estimate the parameters of a\nclassifier and subsequently calibrate a classifier. With default\n`ensemble=True`, for each cv split it\nfits a copy of the base estimator to the training subset, and calibrates it\nusing the testing subset. For prediction, predicted probabilities are\naveraged across these individual calibrated classifiers. When\n`ensemble=False`, cross-validation is used to obtain unbiased predictions,\nvia :func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. For prediction, the base estimator, trained using all\nthe data, is used. This is the method implemented when `probabilities=True`\nfor :mod:`sklearn.svm` estimators.\n\nAlready fitted classifiers can be calibrated via the parameter\n`cv=\"prefit\"`. In this case, no cross-validation is used and all provided\ndata is used for calibration. The user has to take care manually that data\nfor model fitting and calibration are disjoint.\n\nThe calibration is based on the :term:`decision_function` method of the\n`base_estimator` if it exists, else on :term:`predict_proba`.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbase_estimator : estimator instance, default=None\n The classifier whose output need to be calibrated to provide more\n accurate `predict_proba` outputs. The default classifier is\n a :class:`~sklearn.svm.LinearSVC`.\n\nmethod : {'sigmoid', 'isotonic'}, default='sigmoid'\n The method to use for calibration. Can be 'sigmoid' which\n corresponds to Platt's method (i.e. a logistic regression model) or\n 'isotonic' which is a non-parametric approach. It is not advised to\n use isotonic calibration with too few calibration samples\n ``(<<1000)`` since it tends to overfit.\n\ncv : int, cross-validation generator, iterable or \"prefit\", default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs, if ``y`` is binary or multiclass,\n :class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is\n neither binary nor multiclass, :class:`~sklearn.model_selection.KFold`\n is used.\n\n Refer to the :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n If \"prefit\" is passed, it is assumed that `base_estimator` has been\n fitted already and all data is used for calibration.\n\n .. versionchanged:: 0.22\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors.\n\n Base estimator clones are fitted in parallel across cross-validation\n iterations. Therefore parallelism happens only when `cv != \"prefit\"`.\n\n See :term:`Glossary ` for more details.\n\n .. versionadded:: 0.24\n\nensemble : bool, default=True\n Determines how the calibrator is fitted when `cv` is not `'prefit'`.\n Ignored if `cv='prefit'`.\n\n If `True`, the `base_estimator` is fitted using training data and\n calibrated using testing data, for each `cv` fold. The final estimator\n is an ensemble of `n_cv` fitted classifer and calibrator pairs, where\n `n_cv` is the number of cross-validation folds. The output is the\n average predicted probabilities of all pairs.\n\n If `False`, `cv` is used to compute unbiased predictions, via\n :func:`~sklearn.model_selection.cross_val_predict`, which are then\n used for calibration. At prediction time, the classifier used is the\n `base_estimator` trained on all the data.\n Note that this method is also internally implemented in\n :mod:`sklearn.svm` estimators with the `probabilities=True` parameter.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n The class labels.\n\ncalibrated_classifiers_ : list (len() equal to cv or 1 if `cv=\"prefit\"` or `ensemble=False`)\n The list of classifier and calibrator pairs.\n\n - When `cv=\"prefit\"`, the fitted `base_estimator` and fitted\n calibrator.\n - When `cv` is not \"prefit\" and `ensemble=True`, `n_cv` fitted\n `base_estimator` and calibrator pairs. `n_cv` is the number of\n cross-validation folds.\n - When `cv` is not \"prefit\" and `ensemble=False`, the `base_estimator`,\n fitted on all the data, and fitted calibrator.\n\n .. versionchanged:: 0.24\n Single calibrated classifier case when `ensemble=False`.\n\nExamples\n--------\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.naive_bayes import GaussianNB\n>>> from sklearn.calibration import CalibratedClassifierCV\n>>> X, y = make_classification(n_samples=100, n_features=2,\n... n_redundant=0, random_state=42)\n>>> base_clf = GaussianNB()\n>>> calibrated_clf = CalibratedClassifierCV(base_estimator=base_clf, cv=3)\n>>> calibrated_clf.fit(X, y)\nCalibratedClassifierCV(base_estimator=GaussianNB(), cv=3)\n>>> len(calibrated_clf.calibrated_classifiers_)\n3\n>>> calibrated_clf.predict_proba(X)[:5, :]\narray([[0.110..., 0.889...],\n [0.072..., 0.927...],\n [0.928..., 0.071...],\n [0.928..., 0.071...],\n [0.071..., 0.928...]])\n\n>>> from sklearn.model_selection import train_test_split\n>>> X, y = make_classification(n_samples=100, n_features=2,\n... n_redundant=0, random_state=42)\n>>> X_train, X_calib, y_train, y_calib = train_test_split(\n... X, y, random_state=42\n... )\n>>> base_clf = GaussianNB()\n>>> base_clf.fit(X_train, y_train)\nGaussianNB()\n>>> calibrated_clf = CalibratedClassifierCV(\n... base_estimator=base_clf,\n... cv=\"prefit\"\n... )\n>>> calibrated_clf.fit(X_calib, y_calib)\nCalibratedClassifierCV(base_estimator=GaussianNB(), cv='prefit')\n>>> len(calibrated_clf.calibrated_classifiers_)\n1\n>>> calibrated_clf.predict_proba([[-0.5, 0.5]])\narray([[0.936..., 0.063...]])\n\nReferences\n----------\n.. [1] Obtaining calibrated probability estimates from decision trees\n and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001\n\n.. [2] Transforming Classifier Scores into Accurate Multiclass\n Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002)\n\n.. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to\n Regularized Likelihood Methods, J. Platt, (1999)\n\n.. [4] Predicting Good Probabilities with Supervised Learning,\n A. Niculescu-Mizil & R. Caruana, ICML 2005"
- },
- {
- "name": "_CalibratedClassifier",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "base_estimator",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Fitted classifier."
- },
- {
- "name": "calibrators",
- "type": "List",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "List of fitted calibrators (either 'IsotonicRegression' or '_SigmoidCalibration'). The number of calibrators equals the number of classes. However, if there are 2 classes, the list contains only one fitted calibrator."
- },
- {
- "name": "classes",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "All the prediction classes."
- },
- {
- "name": "method",
- "type": "Literal['sigmoid', 'isotonic']",
- "hasDefault": true,
- "default": "'sigmoid'",
- "limitation": null,
- "ignored": false,
- "docstring": "The method to use for calibration. Can be 'sigmoid' which corresponds to Platt's method or 'isotonic' which is a non-parametric approach based on isotonic regression."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "calibrators_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "predict_proba",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The sample data."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Calculate calibrated probabilities.\n\nCalculates classification calibrated probabilities\nfor each class, in a one-vs-all manner, for `X`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n The sample data.\n\nReturns\n-------\nproba : array, shape (n_samples, n_classes)\n The predicted probabilities. Can be exact zeros."
- }
- ],
- "docstring": "Pipeline-like chaining a fitted classifier and its fitted calibrators.\n\nParameters\n----------\nbase_estimator : estimator instance\n Fitted classifier.\n\ncalibrators : list of fitted estimator instances\n List of fitted calibrators (either 'IsotonicRegression' or\n '_SigmoidCalibration'). The number of calibrators equals the number of\n classes. However, if there are 2 classes, the list contains only one\n fitted calibrator.\n\nclasses : array-like of shape (n_classes,)\n All the prediction classes.\n\nmethod : {'sigmoid', 'isotonic'}, default='sigmoid'\n The method to use for calibration. Can be 'sigmoid' which\n corresponds to Platt's method or 'isotonic' which is a\n non-parametric approach based on isotonic regression.\n\nAttributes\n----------\ncalibrators_ : list of fitted estimator instances\n Same as `calibrators`. Exposed for backward-compatibility. Use\n `calibrators` instead.\n\n .. deprecated:: 0.24\n `calibrators_` is deprecated from 0.24 and will be removed in\n 1.1 (renaming of 0.26). Use `calibrators` instead."
- },
- {
- "name": "_SigmoidCalibration",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training target."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample weights. If None, then samples are equally weighted."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples,)\n Training data.\n\ny : array-like of shape (n_samples,)\n Training target.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\nself : object\n Returns an instance of self."
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "T",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data to predict from."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Predict new data by linear interpolation.\n\nParameters\n----------\nT : array-like of shape (n_samples,)\n Data to predict from.\n\nReturns\n-------\nT_ : ndarray of shape (n_samples,)\n The predicted data."
- }
- ],
- "docstring": "Sigmoid regression model.\n\nAttributes\n----------\na_ : float\n The slope.\n\nb_ : float\n The intercept."
- }
- ],
- "functions": [
- {
- "name": "_fit_classifier_calibrator_pair",
- "decorators": [],
- "parameters": [
- {
- "name": "estimator",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Cloned base estimator."
- },
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Targets."
- },
- {
- "name": "train",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Indices of the training subset."
- },
- {
- "name": "test",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Indices of the testing subset."
- },
- {
- "name": "supports_sw",
- "type": "bool",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether or not the `estimator` supports sample weights."
- },
- {
- "name": "method",
- "type": "Literal['sigmoid', 'isotonic']",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Method to use for calibration."
- },
- {
- "name": "classes",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The target classes."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample weights for `X`."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit a classifier/calibration pair on a given train/test split.\n\nFit the classifier on the train set, compute its predictions on the test\nset and use the predictions as input to fit the calibrator along with the\ntest labels.\n\nParameters\n----------\nestimator : estimator instance\n Cloned base estimator.\n\nX : array-like, shape (n_samples, n_features)\n Sample data.\n\ny : array-like, shape (n_samples,)\n Targets.\n\ntrain : ndarray, shape (n_train_indicies,)\n Indices of the training subset.\n\ntest : ndarray, shape (n_test_indicies,)\n Indices of the testing subset.\n\nsupports_sw : bool\n Whether or not the `estimator` supports sample weights.\n\nmethod : {'sigmoid', 'isotonic'}\n Method to use for calibration.\n\nclasses : ndarray, shape (n_classes,)\n The target classes.\n\nsample_weight : array-like, default=None\n Sample weights for `X`.\n\nReturns\n-------\ncalibrated_classifier : _CalibratedClassifier instance"
- },
- {
- "name": "_get_prediction_method",
- "decorators": [],
- "parameters": [
- {
- "name": "clf",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Fitted classifier to obtain the prediction method from."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return prediction method.\n\n`decision_function` method of `clf` returned, if it\nexists, otherwise `predict_proba` method returned.\n\nParameters\n----------\nclf : Estimator instance\n Fitted classifier to obtain the prediction method from.\n\nReturns\n-------\nprediction_method : callable\n The prediction method."
- },
- {
- "name": "_compute_predictions",
- "decorators": [],
- "parameters": [
- {
- "name": "pred_method",
- "type": "Callable",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Prediction method."
- },
- {
- "name": "X",
- "type": "Optional[ArrayLike]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data used to obtain predictions."
- },
- {
- "name": "n_classes",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of classes present."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return predictions for `X` and reshape binary outputs to shape\n(n_samples, 1).\n\nParameters\n----------\npred_method : callable\n Prediction method.\n\nX : array-like or None\n Data used to obtain predictions.\n\nn_classes : int\n Number of classes present.\n\nReturns\n-------\npredictions : array-like, shape (X.shape[0], len(clf.classes_))\n The predictions. Note if there are 2 classes, array is of shape\n (X.shape[0], 1)."
- },
- {
- "name": "_fit_calibrator",
- "decorators": [],
- "parameters": [
- {
- "name": "clf",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Fitted classifier."
- },
- {
- "name": "predictions",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Raw predictions returned by the un-calibrated base classifier."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The targets."
- },
- {
- "name": "classes",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "All the prediction classes."
- },
- {
- "name": "method",
- "type": "Literal['sigmoid', 'isotonic']",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The method to use for calibration."
- },
- {
- "name": "sample_weight",
- "type": "NDArray",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample weights. If None, then samples are equally weighted."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit calibrator(s) and return a `_CalibratedClassifier`\ninstance.\n\n`n_classes` (i.e. `len(clf.classes_)`) calibrators are fitted.\nHowever, if `n_classes` equals 2, one calibrator is fitted.\n\nParameters\n----------\nclf : estimator instance\n Fitted classifier.\n\npredictions : array-like, shape (n_samples, n_classes) or (n_samples, 1) when binary.\n Raw predictions returned by the un-calibrated base classifier.\n\ny : array-like, shape (n_samples,)\n The targets.\n\nclasses : ndarray, shape (n_classes,)\n All the prediction classes.\n\nmethod : {'sigmoid', 'isotonic'}\n The method to use for calibration.\n\nsample_weight : ndarray, shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\npipeline : _CalibratedClassifier instance"
- },
- {
- "name": "_sigmoid_calibration",
- "decorators": [],
- "parameters": [
- {
- "name": "predictions",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The decision function or predict proba for the samples."
- },
- {
- "name": "y",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The targets."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample weights. If None, then samples are equally weighted."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Probability Calibration with sigmoid method (Platt 2000)\n\nParameters\n----------\npredictions : ndarray of shape (n_samples,)\n The decision function or predict proba for the samples.\n\ny : ndarray of shape (n_samples,)\n The targets.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n\nReturns\n-------\na : float\n The slope.\n\nb : float\n The intercept.\n\nReferences\n----------\nPlatt, \"Probabilistic Outputs for Support Vector Machines\""
- },
- {
- "name": "calibration_curve",
- "decorators": [],
- "parameters": [
- {
- "name": "y_true",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "True targets."
- },
- {
- "name": "y_prob",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Probabilities of the positive class."
- },
- {
- "name": "normalize",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether y_prob needs to be normalized into the [0, 1] interval, i.e. is not a proper probability. If True, the smallest value in y_prob is linearly mapped onto 0 and the largest one onto 1."
- },
- {
- "name": "n_bins",
- "type": "int",
- "hasDefault": true,
- "default": "5",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of bins to discretize the [0, 1] interval. A bigger number requires more data. Bins with no samples (i.e. without corresponding values in `y_prob`) will not be returned, thus the returned arrays may have less than `n_bins` values."
- },
- {
- "name": "strategy",
- "type": "Literal['uniform', 'quantile']",
- "hasDefault": true,
- "default": "'uniform'",
- "limitation": null,
- "ignored": false,
- "docstring": "Strategy used to define the widths of the bins. uniform The bins have identical widths. quantile The bins have the same number of samples and depend on `y_prob`."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Compute true and predicted probabilities for a calibration curve.\n\nThe method assumes the inputs come from a binary classifier, and\ndiscretize the [0, 1] interval into bins.\n\nCalibration curves may also be referred to as reliability diagrams.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,)\n True targets.\n\ny_prob : array-like of shape (n_samples,)\n Probabilities of the positive class.\n\nnormalize : bool, default=False\n Whether y_prob needs to be normalized into the [0, 1] interval, i.e.\n is not a proper probability. If True, the smallest value in y_prob\n is linearly mapped onto 0 and the largest one onto 1.\n\nn_bins : int, default=5\n Number of bins to discretize the [0, 1] interval. A bigger number\n requires more data. Bins with no samples (i.e. without\n corresponding values in `y_prob`) will not be returned, thus the\n returned arrays may have less than `n_bins` values.\n\nstrategy : {'uniform', 'quantile'}, default='uniform'\n Strategy used to define the widths of the bins.\n\n uniform\n The bins have identical widths.\n quantile\n The bins have the same number of samples and depend on `y_prob`.\n\nReturns\n-------\nprob_true : ndarray of shape (n_bins,) or smaller\n The proportion of samples whose class is the positive class, in each\n bin (fraction of positives).\n\nprob_pred : ndarray of shape (n_bins,) or smaller\n The mean predicted probability in each bin.\n\nReferences\n----------\nAlexandru Niculescu-Mizil and Rich Caruana (2005) Predicting Good\nProbabilities With Supervised Learning, in Proceedings of the 22nd\nInternational Conference on Machine Learning (ICML).\nSee section 4 (Qualitative Analysis of Predictions).\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.calibration import calibration_curve\n>>> y_true = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1])\n>>> y_pred = np.array([0.1, 0.2, 0.3, 0.4, 0.65, 0.7, 0.8, 0.9, 1.])\n>>> prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=3)\n>>> prob_true\narray([0. , 0.5, 1. ])\n>>> prob_pred\narray([0.2 , 0.525, 0.85 ])"
- }
- ]
- },
- {
- "name": "sklearn.conftest",
- "imports": [
- "import os",
- "from os import environ",
- "from functools import wraps",
- "import pytest",
- "from threadpoolctl import threadpool_limits",
- "from sklearn.utils._openmp_helpers import _openmp_effective_n_threads",
- "from sklearn.datasets import fetch_20newsgroups",
- "from sklearn.datasets import fetch_20newsgroups_vectorized",
- "from sklearn.datasets import fetch_california_housing",
- "from sklearn.datasets import fetch_covtype",
- "from sklearn.datasets import fetch_kddcup99",
- "from sklearn.datasets import fetch_olivetti_faces",
- "from sklearn.datasets import fetch_rcv1"
- ],
- "classes": [],
- "functions": [
- {
- "name": "_fetch_fixture",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fetch dataset (download if missing and requested by environment)."
- },
- {
- "name": "pytest_collection_modifyitems",
- "decorators": [],
- "parameters": [
- {
- "name": "config",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- },
- {
- "name": "items",
- "type": "List",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Called after collect is completed.\n\nParameters\n----------\nconfig : pytest config\nitems : list of collected items"
- },
- {
- "name": "pyplot",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Setup and teardown fixture for matplotlib.\n\nThis fixture checks if we can import matplotlib. If not, the tests will be\nskipped. Otherwise, we setup matplotlib backend and close the figures\nafter running the functions.\n\nReturns\n-------\npyplot : module\n The ``matplotlib.pyplot`` module."
- },
- {
- "name": "pytest_runtest_setup",
- "decorators": [],
- "parameters": [
- {
- "name": "item",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "item to be processed"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Set the number of openmp threads based on the number of workers\nxdist is using to prevent oversubscription.\n\nParameters\n----------\nitem : pytest item\n item to be processed"
- }
- ]
- },
- {
- "name": "sklearn.discriminant_analysis",
- "imports": [
- "import warnings",
- "import numpy as np",
- "from scipy import linalg",
- "from scipy.special import expit",
- "from base import BaseEstimator",
- "from base import TransformerMixin",
- "from base import ClassifierMixin",
- "from linear_model._base import LinearClassifierMixin",
- "from covariance import ledoit_wolf",
- "from covariance import empirical_covariance",
- "from covariance import shrunk_covariance",
- "from utils.multiclass import unique_labels",
- "from utils import check_array",
- "from utils.validation import check_is_fitted",
- "from utils.multiclass import check_classification_targets",
- "from utils.extmath import softmax",
- "from preprocessing import StandardScaler",
- "from utils.validation import _deprecate_positional_args"
- ],
- "classes": [
- {
- "name": "LinearDiscriminantAnalysis",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "solver",
- "type": "Literal['svd', 'lsqr', 'eigen']",
- "hasDefault": true,
- "default": "'svd'",
- "limitation": null,
- "ignored": false,
- "docstring": "Solver to use, possible values: - 'svd': Singular value decomposition (default). Does not compute the covariance matrix, therefore this solver is recommended for data with a large number of features. - 'lsqr': Least squares solution. Can be combined with shrinkage or custom covariance estimator. - 'eigen': Eigenvalue decomposition. Can be combined with shrinkage or custom covariance estimator."
- },
- {
- "name": "shrinkage",
- "type": "Union[Literal['auto'], float]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Shrinkage parameter, possible values: - None: no shrinkage (default). - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. - float between 0 and 1: fixed shrinkage parameter. This should be left to None if `covariance_estimator` is used. Note that shrinkage works only with 'lsqr' and 'eigen' solvers."
- },
- {
- "name": "priors",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The class prior probabilities. By default, the class proportions are inferred from the training data."
- },
- {
- "name": "n_components",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of components (<= min(n_classes - 1, n_features)) for dimensionality reduction. If None, will be set to min(n_classes - 1, n_features). This parameter only affects the `transform` method."
- },
- {
- "name": "store_covariance",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, explicitely compute the weighted within-class covariance matrix when solver is 'svd'. The matrix is always computed and stored for the other solvers. .. versionadded:: 0.17"
- },
- {
- "name": "tol",
- "type": "float",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "Absolute threshold for a singular value of X to be considered significant, used to estimate the rank of X. Dimensions whose singular values are non-significant are discarded. Only used if solver is 'svd'. .. versionadded:: 0.17"
- },
- {
- "name": "covariance_estimator",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If not None, `covariance_estimator` is used to estimate the covariance matrices instead of relying on the empirical covariance estimator (with potential shrinkage). The object should have a fit method and a ``covariance_`` attribute like the estimators in :mod:`sklearn.covariance`. if None the shrinkage parameter drives the estimate. This should be left to None if `shrinkage` is used. Note that `covariance_estimator` works only with 'lsqr' and 'eigen' solvers. .. versionadded:: 0.24"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_solve_lsqr",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- },
- {
- "name": "shrinkage",
- "type": "Optional[Union[Literal['auto'], float]]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Shrinkage parameter, possible values: - None: no shrinkage. - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. - float between 0 and 1: fixed shrinkage parameter. Shrinkage parameter is ignored if `covariance_estimator` i not None"
- },
- {
- "name": "covariance_estimator",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If not None, `covariance_estimator` is used to estimate the covariance matrices instead of relying the empirical covariance estimator (with potential shrinkage). The object should have a fit method and a ``covariance_`` attribute like the estimators in sklearn.covariance. if None the shrinkage parameter drives the estimate. .. versionadded:: 0.24"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Least squares solver.\n\nThe least squares solver computes a straightforward solution of the\noptimal decision rule based directly on the discriminant functions. It\ncan only be used for classification (with any covariance estimator),\nbecause\nestimation of eigenvectors is not performed. Therefore, dimensionality\nreduction with the transform is not supported.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_classes)\n Target values.\n\nshrinkage : 'auto', float or None\n Shrinkage parameter, possible values:\n - None: no shrinkage.\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator` i\n not None\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\nNotes\n-----\nThis solver is based on [1]_, section 2.6.2, pp. 39-41.\n\nReferences\n----------\n.. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n 0-471-05669-3."
- },
- {
- "name": "_solve_eigen",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- },
- {
- "name": "shrinkage",
- "type": "Optional[Union[Literal['auto'], float]]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Shrinkage parameter, possible values: - None: no shrinkage. - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. - float between 0 and 1: fixed shrinkage constant. Shrinkage parameter is ignored if `covariance_estimator` i not None"
- },
- {
- "name": "covariance_estimator",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If not None, `covariance_estimator` is used to estimate the covariance matrices instead of relying the empirical covariance estimator (with potential shrinkage). The object should have a fit method and a ``covariance_`` attribute like the estimators in sklearn.covariance. if None the shrinkage parameter drives the estimate. .. versionadded:: 0.24"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Eigenvalue solver.\n\nThe eigenvalue solver computes the optimal solution of the Rayleigh\ncoefficient (basically the ratio of between class scatter to within\nclass scatter). This solver supports both classification and\ndimensionality reduction (with any covariance estimator).\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\nshrinkage : 'auto', float or None\n Shrinkage parameter, possible values:\n - None: no shrinkage.\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage constant.\n\n Shrinkage parameter is ignored if `covariance_estimator` i\n not None\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\nNotes\n-----\nThis solver is based on [1]_, section 3.8.3, pp. 121-124.\n\nReferences\n----------\n.. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n 0-471-05669-3."
- },
- {
- "name": "_solve_svd",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "SVD solver.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values."
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fit LinearDiscriminantAnalysis model according to the given\n training data and parameters.\n\n .. versionchanged:: 0.19\n *store_covariance* has been moved to main constructor.\n\n .. versionchanged:: 0.19\n *tol* has been moved to main constructor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,)\n Target values."
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Project data to maximize class separation.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_components)\n Transformed data."
- },
- {
- "name": "predict_proba",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Estimate probability.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n Estimated probabilities."
- },
- {
- "name": "predict_log_proba",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Estimate log probability.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n Estimated log probabilities."
- },
- {
- "name": "decision_function",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Array of samples (test vectors)."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply decision function to an array of samples.\n\nThe decision function is equal (up to a constant factor) to the\nlog-posterior of the model, i.e. `log p(y = k | x)`. In a binary\nclassification setting this instead corresponds to the difference\n`log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Array of samples (test vectors).\n\nReturns\n-------\nC : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Decision function values related to each class, per sample.\n In the two-class case, the shape is (n_samples,), giving the\n log likelihood ratio of the positive class."
- }
- ],
- "docstring": "Linear Discriminant Analysis\n\nA classifier with a linear decision boundary, generated by fitting class\nconditional densities to the data and using Bayes' rule.\n\nThe model fits a Gaussian density to each class, assuming that all classes\nshare the same covariance matrix.\n\nThe fitted model can also be used to reduce the dimensionality of the input\nby projecting it to the most discriminative directions, using the\n`transform` method.\n\n.. versionadded:: 0.17\n *LinearDiscriminantAnalysis*.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nsolver : {'svd', 'lsqr', 'eigen'}, default='svd'\n Solver to use, possible values:\n - 'svd': Singular value decomposition (default).\n Does not compute the covariance matrix, therefore this solver is\n recommended for data with a large number of features.\n - 'lsqr': Least squares solution.\n Can be combined with shrinkage or custom covariance estimator.\n - 'eigen': Eigenvalue decomposition.\n Can be combined with shrinkage or custom covariance estimator.\n\nshrinkage : 'auto' or float, default=None\n Shrinkage parameter, possible values:\n - None: no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n This should be left to None if `covariance_estimator` is used.\n Note that shrinkage works only with 'lsqr' and 'eigen' solvers.\n\npriors : array-like of shape (n_classes,), default=None\n The class prior probabilities. By default, the class proportions are\n inferred from the training data.\n\nn_components : int, default=None\n Number of components (<= min(n_classes - 1, n_features)) for\n dimensionality reduction. If None, will be set to\n min(n_classes - 1, n_features). This parameter only affects the\n `transform` method.\n\nstore_covariance : bool, default=False\n If True, explicitely compute the weighted within-class covariance\n matrix when solver is 'svd'. The matrix is always computed\n and stored for the other solvers.\n\n .. versionadded:: 0.17\n\ntol : float, default=1.0e-4\n Absolute threshold for a singular value of X to be considered\n significant, used to estimate the rank of X. Dimensions whose\n singular values are non-significant are discarded. Only used if\n solver is 'svd'.\n\n .. versionadded:: 0.17\n\ncovariance_estimator : covariance estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying on the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in :mod:`sklearn.covariance`.\n if None the shrinkage parameter drives the estimate.\n\n This should be left to None if `shrinkage` is used.\n Note that `covariance_estimator` works only with 'lsqr' and 'eigen'\n solvers.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_classes, n_features)\n Weight vector(s).\n\nintercept_ : ndarray of shape (n_classes,)\n Intercept term.\n\ncovariance_ : array-like of shape (n_features, n_features)\n Weighted within-class covariance matrix. It corresponds to\n `sum_k prior_k * C_k` where `C_k` is the covariance matrix of the\n samples in class `k`. The `C_k` are estimated using the (potentially\n shrunk) biased estimator of covariance. If solver is 'svd', only\n exists when `store_covariance` is True.\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n Percentage of variance explained by each of the selected components.\n If ``n_components`` is not set then all components are stored and the\n sum of explained variances is equal to 1.0. Only available when eigen\n or svd solver is used.\n\nmeans_ : array-like of shape (n_classes, n_features)\n Class-wise means.\n\npriors_ : array-like of shape (n_classes,)\n Class priors (sum to 1).\n\nscalings_ : array-like of shape (rank, n_classes - 1)\n Scaling of the features in the space spanned by the class centroids.\n Only available for 'svd' and 'eigen' solvers.\n\nxbar_ : array-like of shape (n_features,)\n Overall mean. Only present if solver is 'svd'.\n\nclasses_ : array-like of shape (n_classes,)\n Unique class labels.\n\nSee Also\n--------\nQuadraticDiscriminantAnalysis : Quadratic Discriminant Analysis.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = LinearDiscriminantAnalysis()\n>>> clf.fit(X, y)\nLinearDiscriminantAnalysis()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]"
- },
- {
- "name": "QuadraticDiscriminantAnalysis",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "priors",
- "type": "NDArray",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Class priors. By default, the class proportions are inferred from the training data."
- },
- {
- "name": "reg_param",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Regularizes the per-class covariance estimates by transforming S2 as ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``, where S2 corresponds to the `scaling_` attribute of a given class."
- },
- {
- "name": "store_covariance",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the class covariance matrices are explicitely computed and stored in the `self.covariance_` attribute. .. versionadded:: 0.17"
- },
- {
- "name": "tol",
- "type": "float",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "Absolute threshold for a singular value to be considered significant, used to estimate the rank of `Xk` where `Xk` is the centered matrix of samples in class k. This parameter does not affect the predictions. It only controls a warning that is raised when features are considered to be colinear. .. versionadded:: 0.17"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values (integers)"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fit the model according to the given training data and parameters.\n\n .. versionchanged:: 0.19\n ``store_covariances`` has been moved to main constructor as\n ``store_covariance``\n\n .. versionchanged:: 0.19\n ``tol`` has been moved to main constructor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values (integers)"
- },
- {
- "name": "_decision_function",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "decision_function",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Array of samples (test vectors)."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply decision function to an array of samples.\n\nThe decision function is equal (up to a constant factor) to the\nlog-posterior of the model, i.e. `log p(y = k | x)`. In a binary\nclassification setting this instead corresponds to the difference\n`log p(y = 1 | x) - log p(y = 0 | x)`. See :ref:`lda_qda_math`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Array of samples (test vectors).\n\nReturns\n-------\nC : ndarray of shape (n_samples,) or (n_samples, n_classes)\n Decision function values related to each class, per sample.\n In the two-class case, the shape is (n_samples,), giving the\n log likelihood ratio of the positive class."
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Perform classification on an array of test vectors X.\n\nThe predicted class C for each sample in X is returned.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : ndarray of shape (n_samples,)"
- },
- {
- "name": "predict_proba",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Array of samples/test vectors."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return posterior probabilities of classification.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Array of samples/test vectors.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n Posterior probabilities of classification per class."
- },
- {
- "name": "predict_log_proba",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Array of samples/test vectors."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return log of posterior probabilities of classification.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Array of samples/test vectors.\n\nReturns\n-------\nC : ndarray of shape (n_samples, n_classes)\n Posterior log-probabilities of classification per class."
- }
- ],
- "docstring": "Quadratic Discriminant Analysis\n\nA classifier with a quadratic decision boundary, generated\nby fitting class conditional densities to the data\nand using Bayes' rule.\n\nThe model fits a Gaussian density to each class.\n\n.. versionadded:: 0.17\n *QuadraticDiscriminantAnalysis*\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npriors : ndarray of shape (n_classes,), default=None\n Class priors. By default, the class proportions are inferred from the\n training data.\n\nreg_param : float, default=0.0\n Regularizes the per-class covariance estimates by transforming S2 as\n ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``,\n where S2 corresponds to the `scaling_` attribute of a given class.\n\nstore_covariance : bool, default=False\n If True, the class covariance matrices are explicitely computed and\n stored in the `self.covariance_` attribute.\n\n .. versionadded:: 0.17\n\ntol : float, default=1.0e-4\n Absolute threshold for a singular value to be considered significant,\n used to estimate the rank of `Xk` where `Xk` is the centered matrix\n of samples in class k. This parameter does not affect the\n predictions. It only controls a warning that is raised when features\n are considered to be colinear.\n\n .. versionadded:: 0.17\n\nAttributes\n----------\ncovariance_ : list of len n_classes of ndarray of shape (n_features, n_features)\n For each class, gives the covariance matrix estimated using the\n samples of that class. The estimations are unbiased. Only present if\n `store_covariance` is True.\n\nmeans_ : array-like of shape (n_classes, n_features)\n Class-wise means.\n\npriors_ : array-like of shape (n_classes,)\n Class priors (sum to 1).\n\nrotations_ : list of len n_classes of ndarray of shape (n_features, n_k)\n For each class k an array of shape (n_features, n_k), where\n ``n_k = min(n_features, number of elements in class k)``\n It is the rotation of the Gaussian distribution, i.e. its\n principal axis. It corresponds to `V`, the matrix of eigenvectors\n coming from the SVD of `Xk = U S Vt` where `Xk` is the centered\n matrix of samples from class k.\n\nscalings_ : list of len n_classes of ndarray of shape (n_k,)\n For each class, contains the scaling of\n the Gaussian distributions along its principal axes, i.e. the\n variance in the rotated coordinate system. It corresponds to `S^2 /\n (n_samples - 1)`, where `S` is the diagonal matrix of singular values\n from the SVD of `Xk`, where `Xk` is the centered matrix of samples\n from class k.\n\nclasses_ : ndarray of shape (n_classes,)\n Unique class labels.\n\nExamples\n--------\n>>> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = QuadraticDiscriminantAnalysis()\n>>> clf.fit(X, y)\nQuadraticDiscriminantAnalysis()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n\nSee Also\n--------\nLinearDiscriminantAnalysis : Linear Discriminant Analysis."
- }
- ],
- "functions": [
- {
- "name": "_cov",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Estimate covariance matrix (using optional covariance_estimator).\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nshrinkage : {'empirical', 'auto'} or float, default=None\n Shrinkage parameter, possible values:\n - None or 'empirical': no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator`\n is not None.\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying on the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in :mod:`sklearn.covariance``.\n if None the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ns : ndarray of shape (n_features, n_features)\n Estimated covariance matrix."
- },
- {
- "name": "_class_means",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Compute class means.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\nReturns\n-------\nmeans : array-like of shape (n_classes, n_features)\n Class means."
- },
- {
- "name": "_class_cov",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- },
- {
- "name": "priors",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Class priors."
- },
- {
- "name": "shrinkage",
- "type": "Union[Literal['auto'], float]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Shrinkage parameter, possible values: - None: no shrinkage (default). - 'auto': automatic shrinkage using the Ledoit-Wolf lemma. - float between 0 and 1: fixed shrinkage parameter. Shrinkage parameter is ignored if `covariance_estimator` is not None."
- },
- {
- "name": "covariance_estimator",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If not None, `covariance_estimator` is used to estimate the covariance matrices instead of relying the empirical covariance estimator (with potential shrinkage). The object should have a fit method and a ``covariance_`` attribute like the estimators in sklearn.covariance. If None, the shrinkage parameter drives the estimate. .. versionadded:: 0.24"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Compute weighted within-class covariance matrix.\n\nThe per-class covariance are weighted by the class priors.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values.\n\npriors : array-like of shape (n_classes,)\n Class priors.\n\nshrinkage : 'auto' or float, default=None\n Shrinkage parameter, possible values:\n - None: no shrinkage (default).\n - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n - float between 0 and 1: fixed shrinkage parameter.\n\n Shrinkage parameter is ignored if `covariance_estimator` is not None.\n\ncovariance_estimator : estimator, default=None\n If not None, `covariance_estimator` is used to estimate\n the covariance matrices instead of relying the empirical\n covariance estimator (with potential shrinkage).\n The object should have a fit method and a ``covariance_`` attribute\n like the estimators in sklearn.covariance.\n If None, the shrinkage parameter drives the estimate.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ncov : array-like of shape (n_features, n_features)\n Weighted within-class covariance matrix"
- }
- ]
- },
- {
- "name": "sklearn.dummy",
- "imports": [
- "import warnings",
- "import numpy as np",
- "import scipy.sparse as sp",
- "from base import BaseEstimator",
- "from base import ClassifierMixin",
- "from base import RegressorMixin",
- "from base import MultiOutputMixin",
- "from utils import check_random_state",
- "from utils.validation import _num_samples",
- "from utils.validation import check_array",
- "from utils.validation import check_consistent_length",
- "from utils.validation import check_is_fitted",
- "from utils.validation import _check_sample_weight",
- "from utils.random import _random_choice_csc",
- "from utils.stats import _weighted_percentile",
- "from utils.multiclass import class_distribution",
- "from utils.validation import _deprecate_positional_args"
- ],
- "classes": [
- {
- "name": "DummyClassifier",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "strategy",
- "type": "Literal[\"stratified\", \"most_frequent\", \"prior\", \"uniform\", \"constant\"]",
- "hasDefault": true,
- "default": "\"prior\"",
- "limitation": null,
- "ignored": false,
- "docstring": "Strategy to use to generate predictions. * \"stratified\": generates predictions by respecting the training set's class distribution. * \"most_frequent\": always predicts the most frequent label in the training set. * \"prior\": always predicts the class that maximizes the class prior (like \"most_frequent\") and ``predict_proba`` returns the class prior. * \"uniform\": generates predictions uniformly at random. * \"constant\": always predicts a constant label that is provided by the user. This is useful for metrics that evaluate a non-majority class .. versionchanged:: 0.24 The default value of `strategy` has changed to \"prior\" in version 0.24."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Controls the randomness to generate the predictions when ``strategy='stratified'`` or ``strategy='uniform'``. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- },
- {
- "name": "constant",
- "type": "Union[str, int, ArrayLike]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The explicit constant as predicted by the \"constant\" strategy. This parameter is useful only for the \"constant\" strategy."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample weights."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the random classifier.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nself : object"
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Test data."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Perform classification on test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test data.\n\nReturns\n-------\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Predicted target values for X."
- },
- {
- "name": "predict_proba",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Test data."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return probability estimates for the test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test data.\n\nReturns\n-------\nP : ndarray of shape (n_samples, n_classes) or list of such arrays\n Returns the probability of the sample for each class in\n the model, where classes are ordered arithmetically, for each\n output."
- },
- {
- "name": "predict_log_proba",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data, requires length = n_samples"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return log probability estimates for the test vectors X.\n\nParameters\n----------\nX : {array-like, object with finite length or shape}\n Training data, requires length = n_samples\n\nReturns\n-------\nP : ndarray of shape (n_samples, n_classes) or list of such arrays\n Returns the log probability of the sample for each class in\n the model, where classes are ordered arithmetically for each\n output."
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "score",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "Optional[ArrayLike]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Test samples. Passing None as test samples gives the same result as passing real test samples, since DummyClassifier operates independently of the sampled observations."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "True labels for X."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample weights."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Returns the mean accuracy on the given test data and labels.\n\nIn multi-label classification, this is the subset accuracy\nwhich is a harsh metric since you require for each sample that\neach label set be correctly predicted.\n\nParameters\n----------\nX : None or array-like of shape (n_samples, n_features)\n Test samples. Passing None as test samples gives the same result\n as passing real test samples, since DummyClassifier\n operates independently of the sampled observations.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Mean accuracy of self.predict(X) wrt. y."
- }
- ],
- "docstring": "DummyClassifier is a classifier that makes predictions using simple rules.\n\nThis classifier is useful as a simple baseline to compare with other\n(real) classifiers. Do not use it for real problems.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nstrategy : {\"stratified\", \"most_frequent\", \"prior\", \"uniform\", \"constant\"}, default=\"prior\"\n Strategy to use to generate predictions.\n\n * \"stratified\": generates predictions by respecting the training\n set's class distribution.\n * \"most_frequent\": always predicts the most frequent label in the\n training set.\n * \"prior\": always predicts the class that maximizes the class prior\n (like \"most_frequent\") and ``predict_proba`` returns the class prior.\n * \"uniform\": generates predictions uniformly at random.\n * \"constant\": always predicts a constant label that is provided by\n the user. This is useful for metrics that evaluate a non-majority\n class\n\n .. versionchanged:: 0.24\n The default value of `strategy` has changed to \"prior\" in version\n 0.24.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the randomness to generate the predictions when\n ``strategy='stratified'`` or ``strategy='uniform'``.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nconstant : int or str or array-like of shape (n_outputs,)\n The explicit constant as predicted by the \"constant\" strategy. This\n parameter is useful only for the \"constant\" strategy.\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,) or list of such arrays\n Class labels for each output.\n\nn_classes_ : int or list of int\n Number of label for each output.\n\nclass_prior_ : ndarray of shape (n_classes,) or list of such arrays\n Probability of each class for each output.\n\nn_outputs_ : int\n Number of outputs.\n\nsparse_output_ : bool\n True if the array returned from predict is to be in sparse CSC format.\n Is automatically set to True if the input y is passed in sparse format.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.dummy import DummyClassifier\n>>> X = np.array([-1, 1, 1, 1])\n>>> y = np.array([0, 1, 1, 1])\n>>> dummy_clf = DummyClassifier(strategy=\"most_frequent\")\n>>> dummy_clf.fit(X, y)\nDummyClassifier(strategy='most_frequent')\n>>> dummy_clf.predict(X)\narray([1, 1, 1, 1])\n>>> dummy_clf.score(X, y)\n0.75"
- },
- {
- "name": "DummyRegressor",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "strategy",
- "type": "Literal[\"mean\", \"median\", \"quantile\", \"constant\"]",
- "hasDefault": true,
- "default": "\"mean\"",
- "limitation": null,
- "ignored": false,
- "docstring": "Strategy to use to generate predictions. * \"mean\": always predicts the mean of the training set * \"median\": always predicts the median of the training set * \"quantile\": always predicts a specified quantile of the training set, provided with the quantile parameter. * \"constant\": always predicts a constant value that is provided by the user."
- },
- {
- "name": "constant",
- "type": "Union[float, int, ArrayLike]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The explicit constant as predicted by the \"constant\" strategy. This parameter is useful only for the \"constant\" strategy."
- },
- {
- "name": "quantile",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The quantile to predict using the \"quantile\" strategy. A quantile of 0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the maximum."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample weights."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the random regressor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nself : object"
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Test data."
- },
- {
- "name": "return_std",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to return the standard deviation of posterior prediction. All zeros in this case. .. versionadded:: 0.20"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Perform classification on test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test data.\n\nreturn_std : bool, default=False\n Whether to return the standard deviation of posterior prediction.\n All zeros in this case.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Predicted target values for X.\n\ny_std : array-like of shape (n_samples,) or (n_samples, n_outputs)\n Standard deviation of predictive distribution of query points."
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "score",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "Optional[ArrayLike]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Test samples. Passing None as test samples gives the same result as passing real test samples, since DummyRegressor operates independently of the sampled observations."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "True values for X."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample weights."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Returns the coefficient of determination R^2 of the prediction.\n\nThe coefficient R^2 is defined as (1 - u/v), where u is the residual\nsum of squares ((y_true - y_pred) ** 2).sum() and v is the total\nsum of squares ((y_true - y_true.mean()) ** 2).sum().\nThe best possible score is 1.0 and it can be negative (because the\nmodel can be arbitrarily worse). A constant model that always\npredicts the expected value of y, disregarding the input features,\nwould get a R^2 score of 0.0.\n\nParameters\n----------\nX : None or array-like of shape (n_samples, n_features)\n Test samples. Passing None as test samples gives the same result\n as passing real test samples, since DummyRegressor\n operates independently of the sampled observations.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True values for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n R^2 of self.predict(X) wrt. y."
- }
- ],
- "docstring": "DummyRegressor is a regressor that makes predictions using\nsimple rules.\n\nThis regressor is useful as a simple baseline to compare with other\n(real) regressors. Do not use it for real problems.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nstrategy : {\"mean\", \"median\", \"quantile\", \"constant\"}, default=\"mean\"\n Strategy to use to generate predictions.\n\n * \"mean\": always predicts the mean of the training set\n * \"median\": always predicts the median of the training set\n * \"quantile\": always predicts a specified quantile of the training set,\n provided with the quantile parameter.\n * \"constant\": always predicts a constant value that is provided by\n the user.\n\nconstant : int or float or array-like of shape (n_outputs,), default=None\n The explicit constant as predicted by the \"constant\" strategy. This\n parameter is useful only for the \"constant\" strategy.\n\nquantile : float in [0.0, 1.0], default=None\n The quantile to predict using the \"quantile\" strategy. A quantile of\n 0.5 corresponds to the median, while 0.0 to the minimum and 1.0 to the\n maximum.\n\nAttributes\n----------\nconstant_ : ndarray of shape (1, n_outputs)\n Mean or median or quantile of the training targets or constant value\n given by the user.\n\nn_outputs_ : int\n Number of outputs.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.dummy import DummyRegressor\n>>> X = np.array([1.0, 2.0, 3.0, 4.0])\n>>> y = np.array([2.0, 3.0, 5.0, 10.0])\n>>> dummy_regr = DummyRegressor(strategy=\"mean\")\n>>> dummy_regr.fit(X, y)\nDummyRegressor()\n>>> dummy_regr.predict(X)\narray([5., 5., 5., 5.])\n>>> dummy_regr.score(X, y)\n0.0"
- }
- ],
- "functions": []
- },
- {
- "name": "sklearn.exceptions",
- "imports": [
- "from utils.deprecation import deprecated"
- ],
- "classes": [
- {
- "name": "NotFittedError",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- }
- ],
- "docstring": "Exception class to raise if estimator is used before fitting.\n\nThis class inherits from both ValueError and AttributeError to help with\nexception handling and backward compatibility.\n\nExamples\n--------\n>>> from sklearn.svm import LinearSVC\n>>> from sklearn.exceptions import NotFittedError\n>>> try:\n... LinearSVC().predict([[1, 2], [2, 3], [3, 4]])\n... except NotFittedError as e:\n... print(repr(e))\nNotFittedError(\"This LinearSVC instance is not fitted yet. Call 'fit' with\nappropriate arguments before using this estimator.\"...)\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.validation."
- },
- {
- "name": "ChangedBehaviorWarning",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- }
- ],
- "docstring": "Warning class used to notify the user of any change in the behavior.\n\n.. versionchanged:: 0.18\n Moved from sklearn.base."
- },
- {
- "name": "ConvergenceWarning",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- }
- ],
- "docstring": "Custom warning to capture convergence problems\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils."
- },
- {
- "name": "DataConversionWarning",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- }
- ],
- "docstring": "Warning used to notify implicit data conversions happening in the code.\n\nThis warning occurs when some input data needs to be converted or\ninterpreted in a way that may not match the user's expectations.\n\nFor example, this warning may occur when the user\n - passes an integer array to a function which expects float input and\n will convert the input\n - requests a non-copying operation, but a copy is required to meet the\n implementation's data-type expectations;\n - passes an input whose shape can be interpreted ambiguously.\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.validation."
- },
- {
- "name": "DataDimensionalityWarning",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- }
- ],
- "docstring": "Custom warning to notify potential issues with data dimensionality.\n\nFor example, in random projection, this warning is raised when the\nnumber of components, which quantifies the dimensionality of the target\nprojection space, is higher than the number of features, which quantifies\nthe dimensionality of the original source space, to imply that the\ndimensionality of the problem will not be reduced.\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils."
- },
- {
- "name": "EfficiencyWarning",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- }
- ],
- "docstring": "Warning used to notify the user of inefficient computation.\n\nThis warning notifies the user that the efficiency may not be optimal due\nto some reason which may be included as a part of the warning message.\nThis may be subclassed into a more specific Warning class.\n\n.. versionadded:: 0.18"
- },
- {
- "name": "FitFailedWarning",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- }
- ],
- "docstring": "Warning class used if there is an error while fitting the estimator.\n\nThis Warning is used in meta estimators GridSearchCV and RandomizedSearchCV\nand the cross-validation helper function cross_val_score to warn when there\nis an error while fitting the estimator.\n\n.. versionchanged:: 0.18\n Moved from sklearn.cross_validation."
- },
- {
- "name": "NonBLASDotWarning",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- }
- ],
- "docstring": "Warning used when the dot operation does not use BLAS.\n\nThis warning is used to notify the user that BLAS was not used for dot\noperation and hence the efficiency may be affected.\n\n.. versionchanged:: 0.18\n Moved from sklearn.utils.validation, extends EfficiencyWarning."
- },
- {
- "name": "SkipTestWarning",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- }
- ],
- "docstring": "Warning class used to notify the user of a test that was skipped.\n\nFor example, one of the estimator checks requires a pandas import.\nIf the pandas package cannot be imported, the test will be skipped rather\nthan register as a failure."
- },
- {
- "name": "UndefinedMetricWarning",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- }
- ],
- "docstring": "Warning used when the metric is invalid\n\n.. versionchanged:: 0.18\n Moved from sklearn.base."
- },
- {
- "name": "PositiveSpectrumWarning",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- }
- ],
- "docstring": "Warning raised when the eigenvalues of a PSD matrix have issues\n\nThis warning is typically raised by ``_check_psd_eigenvalues`` when the\neigenvalues of a positive semidefinite (PSD) matrix such as a gram matrix\n(kernel) present significant negative eigenvalues, or bad conditioning i.e.\nvery small non-zero eigenvalues compared to the largest eigenvalue.\n\n.. versionadded:: 0.22"
- }
- ],
- "functions": []
- },
- {
- "name": "sklearn.isotonic",
- "imports": [
- "import numpy as np",
- "from scipy import interpolate",
- "from scipy.stats import spearmanr",
- "import warnings",
- "import math",
- "from base import BaseEstimator",
- "from base import TransformerMixin",
- "from base import RegressorMixin",
- "from utils import check_array",
- "from utils import check_consistent_length",
- "from utils.validation import _check_sample_weight",
- "from utils.validation import _deprecate_positional_args",
- "from _isotonic import _inplace_contiguous_isotonic_regression",
- "from _isotonic import _make_unique"
- ],
- "classes": [
- {
- "name": "IsotonicRegression",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "y_min",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Lower bound on the lowest predicted value (the minimum value may still be higher). If not set, defaults to -inf."
- },
- {
- "name": "y_max",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Upper bound on the highest predicted value (the maximum may still be lower). If not set, defaults to +inf."
- },
- {
- "name": "increasing",
- "type": "Union[Literal['auto'], bool]",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines whether the predictions should be constrained to increase or decrease with `X`. 'auto' will decide based on the Spearman correlation estimate's sign."
- },
- {
- "name": "out_of_bounds",
- "type": "Literal['nan', 'clip', 'raise']",
- "hasDefault": true,
- "default": "'nan'",
- "limitation": null,
- "ignored": false,
- "docstring": "Handles how `X` values outside of the training domain are handled during prediction. - 'nan', predictions will be NaN. - 'clip', predictions will be set to the value corresponding to the nearest train interval endpoint. - 'raise', a `ValueError` is raised."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_input_data_shape",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_build_f",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Build the f_ interp1d function."
- },
- {
- "name": "_build_y",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Build the y_ IsotonicRegression."
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data. .. versionchanged:: 0.24 Also accepts 2d array with 1 feature."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training target."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Weights. If set to None, all weights will be set to 1 (equal weights)."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the model using X, y as training data.\n\nParameters\n----------\nX : array-like of shape (n_samples,) or (n_samples, 1)\n Training data.\n\n .. versionchanged:: 0.24\n Also accepts 2d array with 1 feature.\n\ny : array-like of shape (n_samples,)\n Training target.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights. If set to None, all weights will be set to 1 (equal\n weights).\n\nReturns\n-------\nself : object\n Returns an instance of self.\n\nNotes\n-----\nX is stored for future use, as :meth:`transform` needs X to interpolate\nnew input data."
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [
- {
- "name": "T",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data to transform. .. versionchanged:: 0.24 Also accepts 2d array with 1 feature."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Transform new data by linear interpolation\n\nParameters\n----------\nT : array-like of shape (n_samples,) or (n_samples, 1)\n Data to transform.\n\n .. versionchanged:: 0.24\n Also accepts 2d array with 1 feature.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n The transformed data"
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "T",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data to transform."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Predict new data by linear interpolation.\n\nParameters\n----------\nT : array-like of shape (n_samples,) or (n_samples, 1)\n Data to transform.\n\nReturns\n-------\ny_pred : ndarray of shape (n_samples,)\n Transformed data."
- },
- {
- "name": "__getstate__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Pickle-protocol - return state of the estimator. "
- },
- {
- "name": "__setstate__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Pickle-protocol - set state of the estimator.\n\nWe need to rebuild the interpolation function."
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Isotonic regression model.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\ny_min : float, default=None\n Lower bound on the lowest predicted value (the minimum value may\n still be higher). If not set, defaults to -inf.\n\ny_max : float, default=None\n Upper bound on the highest predicted value (the maximum may still be\n lower). If not set, defaults to +inf.\n\nincreasing : bool or 'auto', default=True\n Determines whether the predictions should be constrained to increase\n or decrease with `X`. 'auto' will decide based on the Spearman\n correlation estimate's sign.\n\nout_of_bounds : {'nan', 'clip', 'raise'}, default='nan'\n Handles how `X` values outside of the training domain are handled\n during prediction.\n\n - 'nan', predictions will be NaN.\n - 'clip', predictions will be set to the value corresponding to\n the nearest train interval endpoint.\n - 'raise', a `ValueError` is raised.\n\nAttributes\n----------\nX_min_ : float\n Minimum value of input array `X_` for left bound.\n\nX_max_ : float\n Maximum value of input array `X_` for right bound.\n\nX_thresholds_ : ndarray of shape (n_thresholds,)\n Unique ascending `X` values used to interpolate\n the y = f(X) monotonic function.\n\n .. versionadded:: 0.24\n\ny_thresholds_ : ndarray of shape (n_thresholds,)\n De-duplicated `y` values suitable to interpolate the y = f(X)\n monotonic function.\n\n .. versionadded:: 0.24\n\nf_ : function\n The stepwise interpolating function that covers the input domain ``X``.\n\nincreasing_ : bool\n Inferred value for ``increasing``.\n\nNotes\n-----\nTies are broken using the secondary method from de Leeuw, 1977.\n\nReferences\n----------\nIsotonic Median Regression: A Linear Programming Approach\nNilotpal Chakravarti\nMathematics of Operations Research\nVol. 14, No. 2 (May, 1989), pp. 303-308\n\nIsotone Optimization in R : Pool-Adjacent-Violators\nAlgorithm (PAVA) and Active Set Methods\nde Leeuw, Hornik, Mair\nJournal of Statistical Software 2009\n\nCorrectness of Kruskal's algorithms for monotone regression with ties\nde Leeuw, Psychometrica, 1977\n\nExamples\n--------\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.isotonic import IsotonicRegression\n>>> X, y = make_regression(n_samples=10, n_features=1, random_state=41)\n>>> iso_reg = IsotonicRegression().fit(X, y)\n>>> iso_reg.predict([.1, .2])\narray([1.8628..., 3.7256...])"
- }
- ],
- "functions": [
- {
- "name": "check_increasing",
- "decorators": [],
- "parameters": [
- {
- "name": "x",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training target."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Determine whether y is monotonically correlated with x.\n\ny is found increasing or decreasing with respect to x based on a Spearman\ncorrelation test.\n\nParameters\n----------\nx : array-like of shape (n_samples,)\n Training data.\n\ny : array-like of shape (n_samples,)\n Training target.\n\nReturns\n-------\nincreasing_bool : boolean\n Whether the relationship is increasing or decreasing.\n\nNotes\n-----\nThe Spearman correlation coefficient is estimated from the data, and the\nsign of the resulting estimate is used as the result.\n\nIn the event that the 95% confidence interval based on Fisher transform\nspans zero, a warning is raised.\n\nReferences\n----------\nFisher transformation. Wikipedia.\nhttps://en.wikipedia.org/wiki/Fisher_transformation"
- },
- {
- "name": "isotonic_regression",
- "decorators": [],
- "parameters": [
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The data."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Weights on each point of the regression. If None, weight is set to 1 (equal weights)."
- },
- {
- "name": "y_min",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Lower bound on the lowest predicted value (the minimum value may still be higher). If not set, defaults to -inf."
- },
- {
- "name": "y_max",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Upper bound on the highest predicted value (the maximum may still be lower). If not set, defaults to +inf."
- },
- {
- "name": "increasing",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to compute ``y_`` is increasing (if set to True) or decreasing (if set to False)"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Solve the isotonic regression model.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ny : array-like of shape (n_samples,)\n The data.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights on each point of the regression.\n If None, weight is set to 1 (equal weights).\n\ny_min : float, default=None\n Lower bound on the lowest predicted value (the minimum value may\n still be higher). If not set, defaults to -inf.\n\ny_max : float, default=None\n Upper bound on the highest predicted value (the maximum may still be\n lower). If not set, defaults to +inf.\n\nincreasing : bool, default=True\n Whether to compute ``y_`` is increasing (if set to True) or decreasing\n (if set to False)\n\nReturns\n-------\ny_ : list of floats\n Isotonic fit of y.\n\nReferences\n----------\n\"Active set algorithms for isotonic regression; A unifying framework\"\nby Michael J. Best and Nilotpal Chakravarti, section 3."
- }
- ]
- },
- {
- "name": "sklearn.kernel_approximation",
- "imports": [
- "import warnings",
- "import numpy as np",
- "import scipy.sparse as sp",
- "from scipy.linalg import svd",
- "from scipy.fft import fft",
- "from scipy.fft import ifft",
- "from scipy.fftpack import fft",
- "from scipy.fftpack import ifft",
- "from base import BaseEstimator",
- "from base import TransformerMixin",
- "from utils import check_random_state",
- "from utils import as_float_array",
- "from utils.extmath import safe_sparse_dot",
- "from utils.validation import check_is_fitted",
- "from metrics.pairwise import pairwise_kernels",
- "from metrics.pairwise import KERNEL_PARAMS",
- "from utils.validation import check_non_negative",
- "from utils.validation import _deprecate_positional_args"
- ],
- "classes": [
- {
- "name": "PolynomialCountSketch",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "gamma",
- "type": "float",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameter of the polynomial kernel whose feature map will be approximated."
- },
- {
- "name": "degree",
- "type": "int",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "Degree of the polynomial kernel whose feature map will be approximated."
- },
- {
- "name": "coef0",
- "type": "int",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Constant term of the polynomial kernel whose feature map will be approximated."
- },
- {
- "name": "n_components",
- "type": "int",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "Dimensionality of the output feature space. Usually, n_components should be greater than the number of features in input samples in order to achieve good performance. The optimal score / run time balance is typically achieved around n_components = 10 * n_features, but this depends on the specific dataset being used."
- },
- {
- "name": "random_state",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for indexHash and bitHash initialization. Pass an int for reproducible results across multiple function calls. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data, where n_samples in the number of samples and n_features is the number of features."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the model with X.\n\nInitializes the internal variables. The method needs no information\nabout the distribution of data, so we only care about n_features in X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the transformer."
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "New data, where n_samples in the number of samples and n_features is the number of features."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generate the feature map approximation for X.\n\nParameters\n----------\nX : {array-like}, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)"
- }
- ],
- "docstring": "Polynomial kernel approximation via Tensor Sketch.\n\nImplements Tensor Sketch, which approximates the feature map\nof the polynomial kernel::\n\n K(X, Y) = (gamma * + coef0)^degree\n\nby efficiently computing a Count Sketch of the outer product of a\nvector with itself using Fast Fourier Transforms (FFT). Read more in the\n:ref:`User Guide `.\n\n.. versionadded:: 0.24\n\nParameters\n----------\ngamma : float, default=1.0\n Parameter of the polynomial kernel whose feature map\n will be approximated.\n\ndegree : int, default=2\n Degree of the polynomial kernel whose feature map\n will be approximated.\n\ncoef0 : int, default=0\n Constant term of the polynomial kernel whose feature map\n will be approximated.\n\nn_components : int, default=100\n Dimensionality of the output feature space. Usually, n_components\n should be greater than the number of features in input samples in\n order to achieve good performance. The optimal score / run time\n balance is typically achieved around n_components = 10 * n_features,\n but this depends on the specific dataset being used.\n\nrandom_state : int, RandomState instance, default=None\n Determines random number generation for indexHash and bitHash\n initialization. Pass an int for reproducible results across multiple\n function calls. See :term:`Glossary `.\n\nAttributes\n----------\nindexHash_ : ndarray of shape (degree, n_features), dtype=int64\n Array of indexes in range [0, n_components) used to represent\n the 2-wise independent hash functions for Count Sketch computation.\n\nbitHash_ : ndarray of shape (degree, n_features), dtype=float32\n Array with random entries in {+1, -1}, used to represent\n the 2-wise independent hash functions for Count Sketch computation.\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import PolynomialCountSketch\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> ps = PolynomialCountSketch(degree=3, random_state=1)\n>>> X_features = ps.fit_transform(X)\n>>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=10)\n>>> clf.score(X_features, y)\n1.0"
- },
- {
- "name": "RBFSampler",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "gamma",
- "type": "float",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameter of RBF kernel: exp(-gamma * x^2)"
- },
- {
- "name": "n_components",
- "type": "int",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of Monte Carlo samples per original feature. Equals the dimensionality of the computed feature space."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Pseudo-random number generator to control the generation of the random weights and random offset when fitting the training data. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data, where n_samples in the number of samples and n_features is the number of features."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the model with X.\n\nSamples random projection according to n_features.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the transformer."
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "New data, where n_samples in the number of samples and n_features is the number of features."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply the approximate feature map to X.\n\nParameters\n----------\nX : {array-like, sparse matrix}, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)"
- }
- ],
- "docstring": "Approximates feature map of an RBF kernel by Monte Carlo approximation\nof its Fourier transform.\n\nIt implements a variant of Random Kitchen Sinks.[1]\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ngamma : float, default=1.0\n Parameter of RBF kernel: exp(-gamma * x^2)\n\nn_components : int, default=100\n Number of Monte Carlo samples per original feature.\n Equals the dimensionality of the computed feature space.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the generation of the random\n weights and random offset when fitting the training data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nrandom_offset_ : ndarray of shape (n_components,), dtype=float64\n Random offset used to compute the projection in the `n_components`\n dimensions of the feature space.\n\nrandom_weights_ : ndarray of shape (n_features, n_components), dtype=float64\n Random projection directions drawn from the Fourier transform\n of the RBF kernel.\n\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import RBFSampler\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> rbf_feature = RBFSampler(gamma=1, random_state=1)\n>>> X_features = rbf_feature.fit_transform(X)\n>>> clf = SGDClassifier(max_iter=5, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=5)\n>>> clf.score(X_features, y)\n1.0\n\nNotes\n-----\nSee \"Random Features for Large-Scale Kernel Machines\" by A. Rahimi and\nBenjamin Recht.\n\n[1] \"Weighted Sums of Random Kitchen Sinks: Replacing\nminimization with randomization in learning\" by A. Rahimi and\nBenjamin Recht.\n(https://people.eecs.berkeley.edu/~brecht/papers/08.rah.rec.nips.pdf)"
- },
- {
- "name": "SkewedChi2Sampler",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "skewedness",
- "type": "float",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "\"skewedness\" parameter of the kernel. Needs to be cross-validated."
- },
- {
- "name": "n_components",
- "type": "int",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "number of Monte Carlo samples per original feature. Equals the dimensionality of the computed feature space."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Pseudo-random number generator to control the generation of the random weights and random offset when fitting the training data. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data, where n_samples in the number of samples and n_features is the number of features."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the model with X.\n\nSamples random projection according to n_features.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the transformer."
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "New data, where n_samples in the number of samples and n_features is the number of features. All values of X must be strictly greater than \"-skewedness\"."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply the approximate feature map to X.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n New data, where n_samples in the number of samples\n and n_features is the number of features. All values of X must be\n strictly greater than \"-skewedness\".\n\nReturns\n-------\nX_new : array-like, shape (n_samples, n_components)"
- }
- ],
- "docstring": "Approximates feature map of the \"skewed chi-squared\" kernel by Monte\nCarlo approximation of its Fourier transform.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nskewedness : float, default=1.0\n \"skewedness\" parameter of the kernel. Needs to be cross-validated.\n\nn_components : int, default=100\n number of Monte Carlo samples per original feature.\n Equals the dimensionality of the computed feature space.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the generation of the random\n weights and random offset when fitting the training data.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nrandom_weights_ : ndarray of shape (n_features, n_components)\n Weight array, sampled from a secant hyperbolic distribution, which will\n be used to linearly transform the log of the data.\n\nrandom_offset_ : ndarray of shape (n_features, n_components)\n Bias term, which will be added to the data. It is uniformly distributed\n between 0 and 2*pi.\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import SkewedChi2Sampler\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> chi2_feature = SkewedChi2Sampler(skewedness=.01,\n... n_components=10,\n... random_state=0)\n>>> X_features = chi2_feature.fit_transform(X, y)\n>>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=10)\n>>> clf.score(X_features, y)\n1.0\n\nReferences\n----------\nSee \"Random Fourier Approximations for Skewed Multiplicative Histogram\nKernels\" by Fuxin Li, Catalin Ionescu and Cristian Sminchisescu.\n\nSee Also\n--------\nAdditiveChi2Sampler : A different approach for approximating an additive\n variant of the chi squared kernel.\n\nsklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel."
- },
- {
- "name": "AdditiveChi2Sampler",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "sample_steps",
- "type": "int",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "Gives the number of (complex) sampling points."
- },
- {
- "name": "sample_interval",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sampling interval. Must be specified when sample_steps not in {1,2,3}."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data, where n_samples in the number of samples and n_features is the number of features."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Set the parameters\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features)\n Training data, where n_samples in the number of samples\n and n_features is the number of features.\n\nReturns\n-------\nself : object\n Returns the transformer."
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply approximate feature map to X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n\nReturns\n-------\nX_new : {ndarray, sparse matrix}, shape = (n_samples, n_features * (2*sample_steps + 1))\n Whether the return value is an array of sparse matrix depends on\n the type of the input X."
- },
- {
- "name": "_transform_dense",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_transform_sparse",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Approximate feature map for additive chi2 kernel.\n\nUses sampling the fourier transform of the kernel characteristic\nat regular intervals.\n\nSince the kernel that is to be approximated is additive, the components of\nthe input vectors can be treated separately. Each entry in the original\nspace is transformed into 2*sample_steps+1 features, where sample_steps is\na parameter of the method. Typical values of sample_steps include 1, 2 and\n3.\n\nOptimal choices for the sampling interval for certain data ranges can be\ncomputed (see the reference). The default values should be reasonable.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nsample_steps : int, default=2\n Gives the number of (complex) sampling points.\nsample_interval : float, default=None\n Sampling interval. Must be specified when sample_steps not in {1,2,3}.\n\nAttributes\n----------\nsample_interval_ : float\n Stored sampling interval. Specified as a parameter if sample_steps not\n in {1,2,3}.\n\nExamples\n--------\n>>> from sklearn.datasets import load_digits\n>>> from sklearn.linear_model import SGDClassifier\n>>> from sklearn.kernel_approximation import AdditiveChi2Sampler\n>>> X, y = load_digits(return_X_y=True)\n>>> chi2sampler = AdditiveChi2Sampler(sample_steps=2)\n>>> X_transformed = chi2sampler.fit_transform(X, y)\n>>> clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3)\n>>> clf.fit(X_transformed, y)\nSGDClassifier(max_iter=5, random_state=0)\n>>> clf.score(X_transformed, y)\n0.9499...\n\nNotes\n-----\nThis estimator approximates a slightly different version of the additive\nchi squared kernel then ``metric.additive_chi2`` computes.\n\nSee Also\n--------\nSkewedChi2Sampler : A Fourier-approximation to a non-additive variant of\n the chi squared kernel.\n\nsklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel.\n\nsklearn.metrics.pairwise.additive_chi2_kernel : The exact additive chi\n squared kernel.\n\nReferences\n----------\nSee `\"Efficient additive kernels via explicit feature maps\"\n`_\nA. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence,\n2011"
- },
- {
- "name": "Nystroem",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "kernel",
- "type": "Union[Callable, str]",
- "hasDefault": true,
- "default": "'rbf'",
- "limitation": null,
- "ignored": false,
- "docstring": "Kernel map to be approximated. A callable should accept two arguments and the keyword arguments passed to this object as kernel_params, and should return a floating point number."
- },
- {
- "name": "gamma",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Gamma parameter for the RBF, laplacian, polynomial, exponential chi2 and sigmoid kernels. Interpretation of the default value is left to the kernel; see the documentation for sklearn.metrics.pairwise. Ignored by other kernels."
- },
- {
- "name": "coef0",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Zero coefficient for polynomial and sigmoid kernels. Ignored by other kernels."
- },
- {
- "name": "degree",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Degree of the polynomial kernel. Ignored by other kernels."
- },
- {
- "name": "kernel_params",
- "type": "Dict",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Additional parameters (keyword arguments) for kernel function passed as callable object."
- },
- {
- "name": "n_components",
- "type": "int",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of features to construct. How many data points will be used to construct the mapping."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Pseudo-random number generator to control the uniform sampling without replacement of n_components of the training data to construct the basis kernel. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of jobs to use for the computation. This works by breaking down the kernel matrix into n_jobs even slices and computing them in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionadded:: 0.24"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fit estimator to data.\n\nSamples a subset of training points, computes kernel\non these and computes normalization matrix.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data."
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data to transform."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply feature map to X.\n\nComputes an approximate feature map using the kernel\nbetween some training points and X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data to transform.\n\nReturns\n-------\nX_transformed : ndarray of shape (n_samples, n_components)\n Transformed data."
- },
- {
- "name": "_get_kernel_params",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Approximate a kernel map using a subset of the training data.\n\nConstructs an approximate feature map for an arbitrary kernel\nusing a subset of the data as basis.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nkernel : string or callable, default='rbf'\n Kernel map to be approximated. A callable should accept two arguments\n and the keyword arguments passed to this object as kernel_params, and\n should return a floating point number.\n\ngamma : float, default=None\n Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n and sigmoid kernels. Interpretation of the default value is left to\n the kernel; see the documentation for sklearn.metrics.pairwise.\n Ignored by other kernels.\n\ncoef0 : float, default=None\n Zero coefficient for polynomial and sigmoid kernels.\n Ignored by other kernels.\n\ndegree : float, default=None\n Degree of the polynomial kernel. Ignored by other kernels.\n\nkernel_params : dict, default=None\n Additional parameters (keyword arguments) for kernel function passed\n as callable object.\n\nn_components : int, default=100\n Number of features to construct.\n How many data points will be used to construct the mapping.\n\nrandom_state : int, RandomState instance or None, default=None\n Pseudo-random number generator to control the uniform sampling without\n replacement of n_components of the training data to construct the basis\n kernel.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the kernel matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n Subset of training points used to construct the feature map.\n\ncomponent_indices_ : ndarray of shape (n_components)\n Indices of ``components_`` in the training set.\n\nnormalization_ : ndarray of shape (n_components, n_components)\n Normalization matrix needed for embedding.\n Square root of the kernel matrix on ``components_``.\n\nExamples\n--------\n>>> from sklearn import datasets, svm\n>>> from sklearn.kernel_approximation import Nystroem\n>>> X, y = datasets.load_digits(n_class=9, return_X_y=True)\n>>> data = X / 16.\n>>> clf = svm.LinearSVC()\n>>> feature_map_nystroem = Nystroem(gamma=.2,\n... random_state=1,\n... n_components=300)\n>>> data_transformed = feature_map_nystroem.fit_transform(data)\n>>> clf.fit(data_transformed, y)\nLinearSVC()\n>>> clf.score(data_transformed, y)\n0.9987...\n\nReferences\n----------\n* Williams, C.K.I. and Seeger, M.\n \"Using the Nystroem method to speed up kernel machines\",\n Advances in neural information processing systems 2001\n\n* T. Yang, Y. Li, M. Mahdavi, R. Jin and Z. Zhou\n \"Nystroem Method vs Random Fourier Features: A Theoretical and Empirical\n Comparison\",\n Advances in Neural Information Processing Systems 2012\n\n\nSee Also\n--------\nRBFSampler : An approximation to the RBF kernel using random Fourier\n features.\n\nsklearn.metrics.pairwise.kernel_metrics : List of built-in kernels."
- }
- ],
- "functions": []
- },
- {
- "name": "sklearn.kernel_ridge",
- "imports": [
- "import numpy as np",
- "from base import BaseEstimator",
- "from base import RegressorMixin",
- "from base import MultiOutputMixin",
- "from metrics.pairwise import pairwise_kernels",
- "from linear_model._ridge import _solve_cholesky_kernel",
- "from utils.validation import check_is_fitted",
- "from utils.validation import _check_sample_weight",
- "from utils.validation import _deprecate_positional_args",
- "from utils.deprecation import deprecated"
- ],
- "classes": [
- {
- "name": "KernelRidge",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "alpha",
- "type": "Union[ArrayLike, float]",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. Alpha corresponds to ``1 / (2C)`` in other linear models such as :class:`~sklearn.linear_model.LogisticRegression` or :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are assumed to be specific to the targets. Hence they must correspond in number. See :ref:`ridge_regression` for formula."
- },
- {
- "name": "kernel",
- "type": "Union[Callable, str]",
- "hasDefault": true,
- "default": "\"linear\"",
- "limitation": null,
- "ignored": false,
- "docstring": "Kernel mapping used internally. This parameter is directly passed to :class:`~sklearn.metrics.pairwise.pairwise_kernel`. If `kernel` is a string, it must be one of the metrics in `pairwise.PAIRWISE_KERNEL_FUNCTIONS`. If `kernel` is \"precomputed\", X is assumed to be a kernel matrix. Alternatively, if `kernel` is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two rows from X as input and return the corresponding kernel value as a single number. This means that callables from :mod:`sklearn.metrics.pairwise` are not allowed, as they operate on matrices, not single samples. Use the string identifying the kernel instead."
- },
- {
- "name": "gamma",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Gamma parameter for the RBF, laplacian, polynomial, exponential chi2 and sigmoid kernels. Interpretation of the default value is left to the kernel; see the documentation for sklearn.metrics.pairwise. Ignored by other kernels."
- },
- {
- "name": "degree",
- "type": "float",
- "hasDefault": true,
- "default": "3",
- "limitation": null,
- "ignored": false,
- "docstring": "Degree of the polynomial kernel. Ignored by other kernels."
- },
- {
- "name": "coef0",
- "type": "float",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "Zero coefficient for polynomial and sigmoid kernels. Ignored by other kernels."
- },
- {
- "name": "kernel_params",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Additional parameters (keyword arguments) for kernel function passed as callable object."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_get_kernel",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_pairwise",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data. If kernel == \"precomputed\" this is instead a precomputed kernel matrix, of shape (n_samples, n_samples)."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values"
- },
- {
- "name": "sample_weight",
- "type": "Union[ArrayLike, float]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Individual weights for each sample, ignored if None is passed."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit Kernel Ridge regression model\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data. If kernel == \"precomputed\" this is instead\n a precomputed kernel matrix, of shape (n_samples, n_samples).\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target values\n\nsample_weight : float or array-like of shape (n_samples,), default=None\n Individual weights for each sample, ignored if None is passed.\n\nReturns\n-------\nself : returns an instance of self."
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Samples. If kernel == \"precomputed\" this is instead a precomputed kernel matrix, shape = [n_samples, n_samples_fitted], where n_samples_fitted is the number of samples used in the fitting for this estimator."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Predict using the kernel ridge model\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples. If kernel == \"precomputed\" this is instead a\n precomputed kernel matrix, shape = [n_samples,\n n_samples_fitted], where n_samples_fitted is the number of\n samples used in the fitting for this estimator.\n\nReturns\n-------\nC : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Returns predicted values."
- }
- ],
- "docstring": "Kernel ridge regression.\n\nKernel ridge regression (KRR) combines ridge regression (linear least\nsquares with l2-norm regularization) with the kernel trick. It thus\nlearns a linear function in the space induced by the respective kernel and\nthe data. For non-linear kernels, this corresponds to a non-linear\nfunction in the original space.\n\nThe form of the model learned by KRR is identical to support vector\nregression (SVR). However, different loss functions are used: KRR uses\nsquared error loss while support vector regression uses epsilon-insensitive\nloss, both combined with l2 regularization. In contrast to SVR, fitting a\nKRR model can be done in closed-form and is typically faster for\nmedium-sized datasets. On the other hand, the learned model is non-sparse\nand thus slower than SVR, which learns a sparse model for epsilon > 0, at\nprediction-time.\n\nThis estimator has built-in support for multi-variate regression\n(i.e., when y is a 2d-array of shape [n_samples, n_targets]).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float or array-like of shape (n_targets,), default=1.0\n Regularization strength; must be a positive float. Regularization\n improves the conditioning of the problem and reduces the variance of\n the estimates. Larger values specify stronger regularization.\n Alpha corresponds to ``1 / (2C)`` in other linear models such as\n :class:`~sklearn.linear_model.LogisticRegression` or\n :class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\n assumed to be specific to the targets. Hence they must correspond in\n number. See :ref:`ridge_regression` for formula.\n\nkernel : string or callable, default=\"linear\"\n Kernel mapping used internally. This parameter is directly passed to\n :class:`~sklearn.metrics.pairwise.pairwise_kernel`.\n If `kernel` is a string, it must be one of the metrics\n in `pairwise.PAIRWISE_KERNEL_FUNCTIONS`.\n If `kernel` is \"precomputed\", X is assumed to be a kernel matrix.\n Alternatively, if `kernel` is a callable function, it is called on\n each pair of instances (rows) and the resulting value recorded. The\n callable should take two rows from X as input and return the\n corresponding kernel value as a single number. This means that\n callables from :mod:`sklearn.metrics.pairwise` are not allowed, as\n they operate on matrices, not single samples. Use the string\n identifying the kernel instead.\n\ngamma : float, default=None\n Gamma parameter for the RBF, laplacian, polynomial, exponential chi2\n and sigmoid kernels. Interpretation of the default value is left to\n the kernel; see the documentation for sklearn.metrics.pairwise.\n Ignored by other kernels.\n\ndegree : float, default=3\n Degree of the polynomial kernel. Ignored by other kernels.\n\ncoef0 : float, default=1\n Zero coefficient for polynomial and sigmoid kernels.\n Ignored by other kernels.\n\nkernel_params : mapping of string to any, default=None\n Additional parameters (keyword arguments) for kernel function passed\n as callable object.\n\nAttributes\n----------\ndual_coef_ : ndarray of shape (n_samples,) or (n_samples, n_targets)\n Representation of weight vector(s) in kernel space\n\nX_fit_ : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training data, which is also required for prediction. If\n kernel == \"precomputed\" this is instead the precomputed\n training matrix, of shape (n_samples, n_samples).\n\nReferences\n----------\n* Kevin P. Murphy\n \"Machine Learning: A Probabilistic Perspective\", The MIT Press\n chapter 14.4.3, pp. 492-493\n\nSee Also\n--------\nsklearn.linear_model.Ridge : Linear ridge regression.\nsklearn.svm.SVR : Support Vector Regression implemented using libsvm.\n\nExamples\n--------\n>>> from sklearn.kernel_ridge import KernelRidge\n>>> import numpy as np\n>>> n_samples, n_features = 10, 5\n>>> rng = np.random.RandomState(0)\n>>> y = rng.randn(n_samples)\n>>> X = rng.randn(n_samples, n_features)\n>>> clf = KernelRidge(alpha=1.0)\n>>> clf.fit(X, y)\nKernelRidge(alpha=1.0)"
- }
- ],
- "functions": []
- },
- {
- "name": "sklearn.multiclass",
- "imports": [
- "import array",
- "import numpy as np",
- "import warnings",
- "import scipy.sparse as sp",
- "import itertools",
- "from base import BaseEstimator",
- "from base import ClassifierMixin",
- "from base import clone",
- "from base import is_classifier",
- "from base import MultiOutputMixin",
- "from base import MetaEstimatorMixin",
- "from base import is_regressor",
- "from base import _is_pairwise",
- "from preprocessing import LabelBinarizer",
- "from metrics.pairwise import euclidean_distances",
- "from utils import check_random_state",
- "from utils.deprecation import deprecated",
- "from utils._tags import _safe_tags",
- "from utils.validation import _num_samples",
- "from utils.validation import check_is_fitted",
- "from utils.validation import check_X_y",
- "from utils.validation import check_array",
- "from utils.validation import _deprecate_positional_args",
- "from utils.multiclass import _check_partial_fit_first_call",
- "from utils.multiclass import check_classification_targets",
- "from utils.multiclass import _ovr_decision_function",
- "from utils.metaestimators import _safe_split",
- "from utils.metaestimators import if_delegate_has_method",
- "from utils.fixes import delayed",
- "from exceptions import NotFittedError",
- "from joblib import Parallel"
- ],
- "classes": [
- {
- "name": "_ConstantPredictor",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "decision_function",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "predict_proba",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": null
- },
- {
- "name": "OneVsRestClassifier",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "estimator",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "An estimator object implementing :term:`fit` and one of :term:`decision_function` or :term:`predict_proba`."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of jobs to use for the computation: the `n_classes` one-vs-rest problems are computed in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Multi-class targets. An indicator matrix turns on multilabel classification."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Multi-class targets. An indicator matrix turns on multilabel\n classification.\n\nReturns\n-------\nself"
- },
- {
- "name": "partial_fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Multi-class targets. An indicator matrix turns on multilabel classification."
- },
- {
- "name": "classes",
- "type": "Array",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Classes across all calls to partial_fit. Can be obtained via `np.unique(y_all)`, where y_all is the target vector of the entire dataset. This argument is only required in the first call of partial_fit and can be omitted in the subsequent calls."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Partially fit underlying estimators\n\nShould be used when memory is inefficient to train all data.\nChunks of data can be passed in several iteration.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Multi-class targets. An indicator matrix turns on multilabel\n classification.\n\nclasses : array, shape (n_classes, )\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is only required in the first call of partial_fit\n and can be omitted in the subsequent calls.\n\nReturns\n-------\nself"
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Predict multi-class targets using underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\nReturns\n-------\ny : (sparse) array-like of shape (n_samples,) or (n_samples, n_classes)\n Predicted multi-class targets."
- },
- {
- "name": "predict_proba",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Probability estimates.\n\nThe returned estimates for all classes are ordered by label of classes.\n\nNote that in the multilabel case, each sample can have any number of\nlabels. This returns the marginal probability that the given sample has\nthe label in question. For example, it is entirely consistent that two\nlabels both have a 90% probability of applying to a given sample.\n\nIn the single label multiclass case, the rows of the returned matrix\nsum to 1.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nT : (sparse) array-like of shape (n_samples, n_classes)\n Returns the probability of the sample for each class in the model,\n where classes are ordered as they are in `self.classes_`."
- },
- {
- "name": "decision_function",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Returns the distance of each sample from the decision boundary for\neach class. This can only be used with estimators which implement the\ndecision_function method.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nT : array-like of shape (n_samples, n_classes) or (n_samples,) for binary classification.\n\n .. versionchanged:: 0.19\n output shape changed to ``(n_samples,)`` to conform to\n scikit-learn conventions for binary classification."
- },
- {
- "name": "multilabel_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Whether this is a multilabel classifier"
- },
- {
- "name": "n_classes_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "coef_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "intercept_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_pairwise",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Indicate if wrapped estimator is using a precomputed Gram matrix"
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Indicate if wrapped estimator is using a precomputed Gram matrix"
- },
- {
- "name": "_first_estimator",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "n_features_in_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "One-vs-the-rest (OvR) multiclass strategy.\n\nAlso known as one-vs-all, this strategy consists in fitting one classifier\nper class. For each classifier, the class is fitted against all the other\nclasses. In addition to its computational efficiency (only `n_classes`\nclassifiers are needed), one advantage of this approach is its\ninterpretability. Since each class is represented by one and one classifier\nonly, it is possible to gain knowledge about the class by inspecting its\ncorresponding classifier. This is the most commonly used strategy for\nmulticlass classification and is a fair default choice.\n\nOneVsRestClassifier can also be used for multilabel classification. To use\nthis feature, provide an indicator matrix for the target `y` when calling\n`.fit`. In other words, the target labels should be formatted as a 2D\nbinary (0/1) matrix, where [i, j] == 1 indicates the presence of label j\nin sample i. This estimator uses the binary relevance method to perform\nmultilabel classification, which involves training one binary classifier\nindependently for each label.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit` and one of\n :term:`decision_function` or :term:`predict_proba`.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation: the `n_classes`\n one-vs-rest problems are computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\nAttributes\n----------\nestimators_ : list of `n_classes` estimators\n Estimators used for predictions.\n\ncoef_ : ndarray of shape (1, n_features) or (n_classes, n_features)\n Coefficient of the features in the decision function. This attribute\n exists only if the ``estimators_`` defines ``coef_``.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will\n be removed in 1.1 (renaming of 0.26). If you use this attribute\n in :class:`~sklearn.feature_selection.RFE` or\n :class:`~sklearn.feature_selection.SelectFromModel`,\n you may pass a callable to the `importance_getter`\n parameter that extracts feature the importances\n from `estimators_`.\n\nintercept_ : ndarray of shape (1, 1) or (n_classes, 1)\n If ``y`` is binary, the shape is ``(1, 1)`` else ``(n_classes, 1)``\n This attribute exists only if the ``estimators_`` defines\n ``intercept_``.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will\n be removed in 1.1 (renaming of 0.26). If you use this attribute\n in :class:`~sklearn.feature_selection.RFE` or\n :class:`~sklearn.feature_selection.SelectFromModel`,\n you may pass a callable to the `importance_getter`\n parameter that extracts feature the importances\n from `estimators_`.\n\nclasses_ : array, shape = [`n_classes`]\n Class labels.\n\nn_classes_ : int\n Number of classes.\n\nlabel_binarizer_ : LabelBinarizer object\n Object used to transform multiclass labels to binary labels and\n vice-versa.\n\nmultilabel_ : boolean\n Whether a OneVsRestClassifier is a multilabel classifier.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.multiclass import OneVsRestClassifier\n>>> from sklearn.svm import SVC\n>>> X = np.array([\n... [10, 10],\n... [8, 10],\n... [-5, 5.5],\n... [-5.4, 5.5],\n... [-20, -20],\n... [-15, -20]\n... ])\n>>> y = np.array([0, 0, 1, 1, 2, 2])\n>>> clf = OneVsRestClassifier(SVC()).fit(X, y)\n>>> clf.predict([[-19, -20], [9, 9], [-5, 5]])\narray([2, 0, 1])\n\nSee Also\n--------\nsklearn.multioutput.MultiOutputClassifier : Alternate way of extending an\n estimator for multilabel classification.\nsklearn.preprocessing.MultiLabelBinarizer : Transform iterable of iterables\n to binary indicator matrix."
- },
- {
- "name": "OneVsOneClassifier",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "estimator",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "An estimator object implementing :term:`fit` and one of :term:`decision_function` or :term:`predict_proba`."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of jobs to use for the computation: the `n_classes * ( n_classes - 1) / 2` OVO problems are computed in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Multi-class targets."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : array-like of shape (n_samples,)\n Multi-class targets.\n\nReturns\n-------\nself"
- },
- {
- "name": "partial_fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Multi-class targets."
- },
- {
- "name": "classes",
- "type": "Array",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Classes across all calls to partial_fit. Can be obtained via `np.unique(y_all)`, where y_all is the target vector of the entire dataset. This argument is only required in the first call of partial_fit and can be omitted in the subsequent calls."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Partially fit underlying estimators\n\nShould be used when memory is inefficient to train all data. Chunks\nof data can be passed in several iteration, where the first call\nshould have an array of all target variables.\n\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : array-like of shape (n_samples,)\n Multi-class targets.\n\nclasses : array, shape (n_classes, )\n Classes across all calls to partial_fit.\n Can be obtained via `np.unique(y_all)`, where y_all is the\n target vector of the entire dataset.\n This argument is only required in the first call of partial_fit\n and can be omitted in the subsequent calls.\n\nReturns\n-------\nself"
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Estimate the best class label for each sample in X.\n\nThis is implemented as ``argmax(decision_function(X), axis=1)`` which\nwill return the label of the class with most votes by estimators\npredicting the outcome of a decision for each possible class pair.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\nReturns\n-------\ny : numpy array of shape [n_samples]\n Predicted multi-class targets."
- },
- {
- "name": "decision_function",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Decision function for the OneVsOneClassifier.\n\nThe decision values for the samples are computed by adding the\nnormalized sum of pair-wise classification confidence levels to the\nvotes in order to disambiguate between the decision values when the\nvotes for all the classes are equal leading to a tie.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nY : array-like of shape (n_samples, n_classes) or (n_samples,) for binary classification.\n\n .. versionchanged:: 0.19\n output shape changed to ``(n_samples,)`` to conform to\n scikit-learn conventions for binary classification."
- },
- {
- "name": "n_classes_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_pairwise",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Indicate if wrapped estimator is using a precomputed Gram matrix"
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Indicate if wrapped estimator is using a precomputed Gram matrix"
- }
- ],
- "docstring": "One-vs-one multiclass strategy\n\nThis strategy consists in fitting one classifier per class pair.\nAt prediction time, the class which received the most votes is selected.\nSince it requires to fit `n_classes * (n_classes - 1) / 2` classifiers,\nthis method is usually slower than one-vs-the-rest, due to its\nO(n_classes^2) complexity. However, this method may be advantageous for\nalgorithms such as kernel algorithms which don't scale well with\n`n_samples`. This is because each individual learning problem only involves\na small subset of the data whereas, with one-vs-the-rest, the complete\ndataset is used `n_classes` times.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit` and one of\n :term:`decision_function` or :term:`predict_proba`.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation: the `n_classes * (\n n_classes - 1) / 2` OVO problems are computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nestimators_ : list of ``n_classes * (n_classes - 1) / 2`` estimators\n Estimators used for predictions.\n\nclasses_ : numpy array of shape [n_classes]\n Array containing labels.\n\nn_classes_ : int\n Number of classes\n\npairwise_indices_ : list, length = ``len(estimators_)``, or ``None``\n Indices of samples used when training the estimators.\n ``None`` when ``estimator``'s `pairwise` tag is False.\n\n .. deprecated:: 0.24\n\n The _pairwise attribute is deprecated in 0.24. From 1.1\n (renaming of 0.25) and onward, `pairwise_indices_` will use the\n pairwise estimator tag instead.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.multiclass import OneVsOneClassifier\n>>> from sklearn.svm import LinearSVC\n>>> X, y = load_iris(return_X_y=True)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n... X, y, test_size=0.33, shuffle=True, random_state=0)\n>>> clf = OneVsOneClassifier(\n... LinearSVC(random_state=0)).fit(X_train, y_train)\n>>> clf.predict(X_test[:10])\narray([2, 1, 0, 2, 0, 2, 0, 1, 1, 1])"
- },
- {
- "name": "OutputCodeClassifier",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "estimator",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "An estimator object implementing :term:`fit` and one of :term:`decision_function` or :term:`predict_proba`."
- },
- {
- "name": "code_size",
- "type": "float",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Percentage of the number of classes to be used to create the code book. A number between 0 and 1 will require fewer classifiers than one-vs-the-rest. A number greater than 1 will require more classifiers than one-vs-the-rest."
- },
- {
- "name": "random_state",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The generator used to initialize the codebook. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of jobs to use for the computation: the multiclass problems are computed in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Multi-class targets."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\ny : numpy array of shape [n_samples]\n Multi-class targets.\n\nReturns\n-------\nself"
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Predict multi-class targets using underlying estimators.\n\nParameters\n----------\nX : (sparse) array-like of shape (n_samples, n_features)\n Data.\n\nReturns\n-------\ny : numpy array of shape [n_samples]\n Predicted multi-class targets."
- }
- ],
- "docstring": "(Error-Correcting) Output-Code multiclass strategy\n\nOutput-code based strategies consist in representing each class with a\nbinary code (an array of 0s and 1s). At fitting time, one binary\nclassifier per bit in the code book is fitted. At prediction time, the\nclassifiers are used to project new points in the class space and the class\nclosest to the points is chosen. The main advantage of these strategies is\nthat the number of classifiers used can be controlled by the user, either\nfor compressing the model (0 < code_size < 1) or for making the model more\nrobust to errors (code_size > 1). See the documentation for more details.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit` and one of\n :term:`decision_function` or :term:`predict_proba`.\n\ncode_size : float\n Percentage of the number of classes to be used to create the code book.\n A number between 0 and 1 will require fewer classifiers than\n one-vs-the-rest. A number greater than 1 will require more classifiers\n than one-vs-the-rest.\n\nrandom_state : int, RandomState instance, default=None\n The generator used to initialize the codebook.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation: the multiclass problems\n are computed in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nestimators_ : list of `int(n_classes * code_size)` estimators\n Estimators used for predictions.\n\nclasses_ : numpy array of shape [n_classes]\n Array containing labels.\n\ncode_book_ : numpy array of shape [n_classes, code_size]\n Binary array containing the code of each class.\n\nExamples\n--------\n>>> from sklearn.multiclass import OutputCodeClassifier\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.datasets import make_classification\n>>> X, y = make_classification(n_samples=100, n_features=4,\n... n_informative=2, n_redundant=0,\n... random_state=0, shuffle=False)\n>>> clf = OutputCodeClassifier(\n... estimator=RandomForestClassifier(random_state=0),\n... random_state=0).fit(X, y)\n>>> clf.predict([[0, 0, 0, 0]])\narray([1])\n\nReferences\n----------\n\n.. [1] \"Solving multiclass learning problems via error-correcting output\n codes\",\n Dietterich T., Bakiri G.,\n Journal of Artificial Intelligence Research 2,\n 1995.\n\n.. [2] \"The error coding method and PICTs\",\n James G., Hastie T.,\n Journal of Computational and Graphical statistics 7,\n 1998.\n\n.. [3] \"The Elements of Statistical Learning\",\n Hastie T., Tibshirani R., Friedman J., page 606 (second-edition)\n 2008."
- }
- ],
- "functions": [
- {
- "name": "_fit_binary",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fit a single binary estimator."
- },
- {
- "name": "_partial_fit_binary",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Partially fit a single binary estimator."
- },
- {
- "name": "_predict_binary",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Make predictions using a single binary estimator."
- },
- {
- "name": "_check_estimator",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Make sure that an estimator implements the necessary methods."
- },
- {
- "name": "_fit_ovo_binary",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fit a single binary estimator (one-vs-one)."
- },
- {
- "name": "_partial_fit_ovo_binary",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Partially fit a single binary estimator(one-vs-one)."
- }
- ]
- },
- {
- "name": "sklearn.multioutput",
- "imports": [
- "import numpy as np",
- "import scipy.sparse as sp",
- "from joblib import Parallel",
- "from abc import ABCMeta",
- "from abc import abstractmethod",
- "from base import BaseEstimator",
- "from base import clone",
- "from base import MetaEstimatorMixin",
- "from base import RegressorMixin",
- "from base import ClassifierMixin",
- "from base import is_classifier",
- "from model_selection import cross_val_predict",
- "from utils import check_array",
- "from utils import check_X_y",
- "from utils import check_random_state",
- "from utils.metaestimators import if_delegate_has_method",
- "from utils.validation import check_is_fitted",
- "from utils.validation import has_fit_parameter",
- "from utils.validation import _check_fit_params",
- "from utils.validation import _deprecate_positional_args",
- "from utils.multiclass import check_classification_targets",
- "from utils.fixes import delayed"
- ],
- "classes": [
- {
- "name": "_MultiOutputEstimator",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "partial_fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Multi-output targets."
- },
- {
- "name": "classes",
- "type": "List[NDArray]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Each array is unique classes for one output in str/int Can be obtained by via ``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where y is the target matrix of the entire dataset. This argument is required for the first call to partial_fit and can be omitted in the subsequent calls. Note that y doesn't need to contain all labels in `classes`."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample weights. If None, then samples are equally weighted. Only supported if the underlying regressor supports sample weights."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Incrementally fit the model to data.\nFit a separate model for each output variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets.\n\nclasses : list of ndarray of shape (n_outputs,)\n Each array is unique classes for one output in str/int\n Can be obtained by via\n ``[np.unique(y[:, i]) for i in range(y.shape[1])]``, where y is the\n target matrix of the entire dataset.\n This argument is required for the first call to partial_fit\n and can be omitted in the subsequent calls.\n Note that y doesn't need to contain all labels in `classes`.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\nReturns\n-------\nself : object"
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Multi-output targets. An indicator matrix turns on multilabel estimation."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample weights. If None, then samples are equally weighted. Only supported if the underlying regressor supports sample weights."
- },
- {
- "name": "**fit_params",
- "type": "Dict",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameters passed to the ``estimator.fit`` method of each step. .. versionadded:: 0.23"
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the model to data.\nFit a separate model for each output variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets. An indicator matrix turns on multilabel\n estimation.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``estimator.fit`` method of each step.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object"
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Predict multi-output variable using a model\n trained for each target variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\nReturns\n-------\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets predicted across multiple predictors.\n Note: Separate models are generated for each predictor."
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": null
- },
- {
- "name": "MultiOutputRegressor",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "estimator",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "An estimator object implementing :term:`fit` and :term:`predict`."
- },
- {
- "name": "n_jobs",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported by the passed estimator) will be parallelized for each target. When individual estimators are fast to train or predict, using ``n_jobs > 1`` can result in slower performance due to the parallelism overhead. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all available processes / threads. See :term:`Glossary ` for more details. .. versionchanged:: 0.20 `n_jobs` default changed from 1 to None"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "partial_fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Multi-output targets."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample weights. If None, then samples are equally weighted. Only supported if the underlying regressor supports sample weights."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Incrementally fit the model to data.\nFit a separate model for each output variable.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Data.\n\ny : {array-like, sparse matrix} of shape (n_samples, n_outputs)\n Multi-output targets.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying regressor supports sample\n weights.\n\nReturns\n-------\nself : object"
- }
- ],
- "docstring": "Multi target regression\n\nThis strategy consists of fitting one regressor per target. This is a\nsimple strategy for extending regressors that do not natively support\nmulti-target regression.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit` and :term:`predict`.\n\nn_jobs : int or None, optional (default=None)\n The number of jobs to run in parallel.\n :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\n by the passed estimator) will be parallelized for each target.\n\n When individual estimators are fast to train or predict,\n using ``n_jobs > 1`` can result in slower performance due\n to the parallelism overhead.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all available processes / threads.\n See :term:`Glossary ` for more details.\n\n .. versionchanged:: 0.20\n `n_jobs` default changed from 1 to None\n\nAttributes\n----------\nestimators_ : list of ``n_output`` estimators\n Estimators used for predictions.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import load_linnerud\n>>> from sklearn.multioutput import MultiOutputRegressor\n>>> from sklearn.linear_model import Ridge\n>>> X, y = load_linnerud(return_X_y=True)\n>>> clf = MultiOutputRegressor(Ridge(random_state=123)).fit(X, y)\n>>> clf.predict(X[[0]])\narray([[176..., 35..., 57...]])"
- },
- {
- "name": "MultiOutputClassifier",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "estimator",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "An estimator object implementing :term:`fit`, :term:`score` and :term:`predict_proba`."
- },
- {
- "name": "n_jobs",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported by the passed estimator) will be parallelized for each target. When individual estimators are fast to train or predict, using ``n_jobs > 1`` can result in slower performance due to the parallelism overhead. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all available processes / threads. See :term:`Glossary ` for more details. .. versionchanged:: 0.20 `n_jobs` default changed from 1 to None"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The input data."
- },
- {
- "name": "Y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The target values."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample weights. If None, then samples are equally weighted. Only supported if the underlying classifier supports sample weights."
- },
- {
- "name": "**fit_params",
- "type": "Dict",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameters passed to the ``estimator.fit`` method of each step. .. versionadded:: 0.23"
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\nY : array-like of shape (n_samples, n_classes)\n The target values.\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights. If None, then samples are equally weighted.\n Only supported if the underlying classifier supports sample\n weights.\n**fit_params : dict of string -> object\n Parameters passed to the ``estimator.fit`` method of each step.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object"
- },
- {
- "name": "predict_proba",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Probability estimates.\nReturns prediction probabilities for each class of each output.\n\nThis method will raise a ``ValueError`` if any of the\nestimators do not have ``predict_proba``.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data\n\nReturns\n-------\np : array of shape (n_samples, n_classes), or a list of n_outputs such arrays if n_outputs > 1.\n The class probabilities of the input samples. The order of the\n classes corresponds to that in the attribute :term:`classes_`.\n\n .. versionchanged:: 0.19\n This function now returns a list of arrays where the length of\n the list is ``n_outputs``, and each array is (``n_samples``,\n ``n_classes``) for that particular output."
- },
- {
- "name": "_predict_proba",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "score",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Test samples"
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "True values for X"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Returns the mean accuracy on the given test data and labels.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples\n\ny : array-like of shape (n_samples, n_outputs)\n True values for X\n\nReturns\n-------\nscores : float\n accuracy_score of self.predict(X) versus y"
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Multi target classification\n\nThis strategy consists of fitting one classifier per target. This is a\nsimple strategy for extending classifiers that do not natively support\nmulti-target classification\n\nParameters\n----------\nestimator : estimator object\n An estimator object implementing :term:`fit`, :term:`score` and\n :term:`predict_proba`.\n\nn_jobs : int or None, optional (default=None)\n The number of jobs to run in parallel.\n :meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\n by the passed estimator) will be parallelized for each target.\n\n When individual estimators are fast to train or predict,\n using ``n_jobs > 1`` can result in slower performance due\n to the parallelism overhead.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all available processes / threads.\n See :term:`Glossary ` for more details.\n\n .. versionchanged:: 0.20\n `n_jobs` default changed from 1 to None\n\nAttributes\n----------\nclasses_ : ndarray of shape (n_classes,)\n Class labels.\n\nestimators_ : list of ``n_output`` estimators\n Estimators used for predictions.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.multioutput import MultiOutputClassifier\n>>> from sklearn.neighbors import KNeighborsClassifier\n\n>>> X, y = make_multilabel_classification(n_classes=3, random_state=0)\n>>> clf = MultiOutputClassifier(KNeighborsClassifier()).fit(X, y)\n>>> clf.predict(X[-2:])\narray([[1, 1, 0], [1, 1, 1]])"
- },
- {
- "name": "_BaseChain",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The input data."
- },
- {
- "name": "Y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The target values."
- },
- {
- "name": "**fit_params",
- "type": "Dict",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameters passed to the `fit` method of each step. .. versionadded:: 0.23"
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\nY : array-like of shape (n_samples, n_classes)\n The target values.\n**fit_params : dict of string -> object\n Parameters passed to the `fit` method of each step.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object"
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The input data."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Predict on the data matrix X using the ClassifierChain model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\n\nReturns\n-------\nY_pred : array-like of shape (n_samples, n_classes)\n The predicted values."
- }
- ],
- "docstring": null
- },
- {
- "name": "ClassifierChain",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "base_estimator",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The base estimator from which the classifier chain is built."
- },
- {
- "name": "order",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If None, the order will be determined by the order of columns in the label matrix Y.:: order = [0, 1, 2, ..., Y.shape[1] - 1] The order of the chain can be explicitly set by providing a list of integers. For example, for a chain of length 5.:: order = [1, 3, 2, 4, 0] means that the first model in the chain will make predictions for column 1 in the Y matrix, the second model will make predictions for column 3, etc. If order is 'random' a random ordering will be used."
- },
- {
- "name": "cv",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines whether to use cross validated predictions or true labels for the results of previous estimators in the chain. Possible inputs for cv are: - None, to use true labels when fitting, - integer, to specify the number of folds in a (Stratified)KFold, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If ``order='random'``, determines random number generation for the chain order. In addition, it controls the random seed given at each `base_estimator` at each chaining iteration. Thus, it is only used when `base_estimator` exposes a `random_state`. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The input data."
- },
- {
- "name": "Y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The target values."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\nY : array-like of shape (n_samples, n_classes)\n The target values.\n\nReturns\n-------\nself : object"
- },
- {
- "name": "predict_proba",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Predict probability estimates.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n\nReturns\n-------\nY_prob : array-like of shape (n_samples, n_classes)"
- },
- {
- "name": "decision_function",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Evaluate the decision_function of the models in the chain.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nY_decision : array-like of shape (n_samples, n_classes)\n Returns the decision function of the sample for each model\n in the chain."
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "A multi-label model that arranges binary classifiers into a chain.\n\nEach model makes a prediction in the order specified by the chain using\nall of the available features provided to the model plus the predictions\nof models that are earlier in the chain.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.19\n\nParameters\n----------\nbase_estimator : estimator\n The base estimator from which the classifier chain is built.\n\norder : array-like of shape (n_outputs,) or 'random', default=None\n If None, the order will be determined by the order of columns in\n the label matrix Y.::\n\n order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n The order of the chain can be explicitly set by providing a list of\n integers. For example, for a chain of length 5.::\n\n order = [1, 3, 2, 4, 0]\n\n means that the first model in the chain will make predictions for\n column 1 in the Y matrix, the second model will make predictions\n for column 3, etc.\n\n If order is 'random' a random ordering will be used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines whether to use cross validated predictions or true\n labels for the results of previous estimators in the chain.\n Possible inputs for cv are:\n\n - None, to use true labels when fitting,\n - integer, to specify the number of folds in a (Stratified)KFold,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\nrandom_state : int, RandomState instance or None, optional (default=None)\n If ``order='random'``, determines random number generation for the\n chain order.\n In addition, it controls the random seed given at each `base_estimator`\n at each chaining iteration. Thus, it is only used when `base_estimator`\n exposes a `random_state`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nclasses_ : list\n A list of arrays of length ``len(estimators_)`` containing the\n class labels for each estimator in the chain.\n\nestimators_ : list\n A list of clones of base_estimator.\n\norder_ : list\n The order of labels in the classifier chain.\n\nExamples\n--------\n>>> from sklearn.datasets import make_multilabel_classification\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.multioutput import ClassifierChain\n>>> X, Y = make_multilabel_classification(\n... n_samples=12, n_classes=3, random_state=0\n... )\n>>> X_train, X_test, Y_train, Y_test = train_test_split(\n... X, Y, random_state=0\n... )\n>>> base_lr = LogisticRegression(solver='lbfgs', random_state=0)\n>>> chain = ClassifierChain(base_lr, order='random', random_state=0)\n>>> chain.fit(X_train, Y_train).predict(X_test)\narray([[1., 1., 0.],\n [1., 0., 0.],\n [0., 1., 0.]])\n>>> chain.predict_proba(X_test)\narray([[0.8387..., 0.9431..., 0.4576...],\n [0.8878..., 0.3684..., 0.2640...],\n [0.0321..., 0.9935..., 0.0625...]])\n\nSee Also\n--------\nRegressorChain : Equivalent for regression.\nMultioutputClassifier : Classifies each output independently rather than\n chaining.\n\nReferences\n----------\nJesse Read, Bernhard Pfahringer, Geoff Holmes, Eibe Frank, \"Classifier\nChains for Multi-label Classification\", 2009."
- },
- {
- "name": "RegressorChain",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "base_estimator",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The base estimator from which the classifier chain is built."
- },
- {
- "name": "order",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If None, the order will be determined by the order of columns in the label matrix Y.:: order = [0, 1, 2, ..., Y.shape[1] - 1] The order of the chain can be explicitly set by providing a list of integers. For example, for a chain of length 5.:: order = [1, 3, 2, 4, 0] means that the first model in the chain will make predictions for column 1 in the Y matrix, the second model will make predictions for column 3, etc. If order is 'random' a random ordering will be used."
- },
- {
- "name": "cv",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines whether to use cross validated predictions or true labels for the results of previous estimators in the chain. Possible inputs for cv are: - None, to use true labels when fitting, - integer, to specify the number of folds in a (Stratified)KFold, - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If ``order='random'``, determines random number generation for the chain order. In addition, it controls the random seed given at each `base_estimator` at each chaining iteration. Thus, it is only used when `base_estimator` exposes a `random_state`. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The input data."
- },
- {
- "name": "Y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The target values."
- },
- {
- "name": "**fit_params",
- "type": "Dict",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameters passed to the `fit` method at each step of the regressor chain. .. versionadded:: 0.23"
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the model to data matrix X and targets Y.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The input data.\nY : array-like of shape (n_samples, n_classes)\n The target values.\n\n**fit_params : dict of string -> object\n Parameters passed to the `fit` method at each step\n of the regressor chain.\n\n .. versionadded:: 0.23\n\nReturns\n-------\nself : object"
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "A multi-label model that arranges regressions into a chain.\n\nEach model makes a prediction in the order specified by the chain using\nall of the available features provided to the model plus the predictions\nof models that are earlier in the chain.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nbase_estimator : estimator\n The base estimator from which the classifier chain is built.\n\norder : array-like of shape (n_outputs,) or 'random', default=None\n If None, the order will be determined by the order of columns in\n the label matrix Y.::\n\n order = [0, 1, 2, ..., Y.shape[1] - 1]\n\n The order of the chain can be explicitly set by providing a list of\n integers. For example, for a chain of length 5.::\n\n order = [1, 3, 2, 4, 0]\n\n means that the first model in the chain will make predictions for\n column 1 in the Y matrix, the second model will make predictions\n for column 3, etc.\n\n If order is 'random' a random ordering will be used.\n\ncv : int, cross-validation generator or an iterable, default=None\n Determines whether to use cross validated predictions or true\n labels for the results of previous estimators in the chain.\n Possible inputs for cv are:\n\n - None, to use true labels when fitting,\n - integer, to specify the number of folds in a (Stratified)KFold,\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\nrandom_state : int, RandomState instance or None, optional (default=None)\n If ``order='random'``, determines random number generation for the\n chain order.\n In addition, it controls the random seed given at each `base_estimator`\n at each chaining iteration. Thus, it is only used when `base_estimator`\n exposes a `random_state`.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nestimators_ : list\n A list of clones of base_estimator.\n\norder_ : list\n The order of labels in the classifier chain.\n\nExamples\n--------\n>>> from sklearn.multioutput import RegressorChain\n>>> from sklearn.linear_model import LogisticRegression\n>>> logreg = LogisticRegression(solver='lbfgs',multi_class='multinomial')\n>>> X, Y = [[1, 0], [0, 1], [1, 1]], [[0, 2], [1, 1], [2, 0]]\n>>> chain = RegressorChain(base_estimator=logreg, order=[0, 1]).fit(X, Y)\n>>> chain.predict(X)\narray([[0., 2.],\n [1., 1.],\n [2., 0.]])\n\nSee Also\n--------\nClassifierChain : Equivalent for classification.\nMultioutputRegressor : Learns each output independently rather than\n chaining."
- }
- ],
- "functions": [
- {
- "name": "_fit_estimator",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_partial_fit_estimator",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.naive_bayes",
- "imports": [
- "import warnings",
- "from abc import ABCMeta",
- "from abc import abstractmethod",
- "import numpy as np",
- "from scipy.special import logsumexp",
- "from base import BaseEstimator",
- "from base import ClassifierMixin",
- "from preprocessing import binarize",
- "from preprocessing import LabelBinarizer",
- "from preprocessing import label_binarize",
- "from utils import check_X_y",
- "from utils import check_array",
- "from utils import deprecated",
- "from utils.extmath import safe_sparse_dot",
- "from utils.multiclass import _check_partial_fit_first_call",
- "from utils.validation import check_is_fitted",
- "from utils.validation import check_non_negative",
- "from utils.validation import column_or_1d",
- "from utils.validation import _check_sample_weight",
- "from utils.validation import _deprecate_positional_args"
- ],
- "classes": [
- {
- "name": "_BaseNB",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "_joint_log_likelihood",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Compute the unnormalized posterior log probability of X\n\nI.e. ``log P(c) + log P(x|c)`` for all rows x of X, as an array-like of\nshape (n_classes, n_samples).\n\nInput is passed to _joint_log_likelihood as-is by predict,\npredict_proba and predict_log_proba."
- },
- {
- "name": "_check_X",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "To be overridden in subclasses with the actual checks."
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Perform classification on an array of test vectors X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : ndarray of shape (n_samples,)\n Predicted target values for X"
- },
- {
- "name": "predict_log_proba",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return log-probability estimates for the test vector X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : array-like of shape (n_samples, n_classes)\n Returns the log-probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`."
- },
- {
- "name": "predict_proba",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return probability estimates for the test vector X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\nReturns\n-------\nC : array-like of shape (n_samples, n_classes)\n Returns the probability of the samples for each class in\n the model. The columns correspond to the classes in sorted\n order, as they appear in the attribute :term:`classes_`."
- }
- ],
- "docstring": "Abstract base class for naive Bayes estimators"
- },
- {
- "name": "GaussianNB",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "priors",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Prior probabilities of the classes. If specified the priors are not adjusted according to the data."
- },
- {
- "name": "var_smoothing",
- "type": "float",
- "hasDefault": true,
- "default": "1e-9",
- "limitation": null,
- "ignored": false,
- "docstring": "Portion of the largest variance of all features that is added to variances for calculation stability. .. versionadded:: 0.20"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Weights applied to individual samples (1. for unweighted). .. versionadded:: 0.17 Gaussian Naive Bayes supports fitting with *sample_weight*."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit Gaussian Naive Bayes according to X, y\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples\n and n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n .. versionadded:: 0.17\n Gaussian Naive Bayes supports fitting with *sample_weight*.\n\nReturns\n-------\nself : object"
- },
- {
- "name": "_check_X",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_update_mean_variance",
- "decorators": [],
- "parameters": [
- {
- "name": "n_past",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of samples represented in old mean and variance. If sample weights were given, this should contain the sum of sample weights represented in old mean and variance."
- },
- {
- "name": "mu",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Means for Gaussians in original set."
- },
- {
- "name": "var",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Variances for Gaussians in original set."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Weights applied to individual samples (1. for unweighted)."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Compute online update of Gaussian mean and variance.\n\nGiven starting sample count, mean, and variance, a new set of\npoints X, and optionally sample weights, return the updated mean and\nvariance. (NB - each dimension (column) in X is treated as independent\n-- you get variance, not covariance).\n\nCan take scalar mean and variance, or vector mean and variance to\nsimultaneously update a number of independent Gaussians.\n\nSee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\nhttp://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nParameters\n----------\nn_past : int\n Number of samples represented in old mean and variance. If sample\n weights were given, this should contain the sum of sample\n weights represented in old mean and variance.\n\nmu : array-like of shape (number of Gaussians,)\n Means for Gaussians in original set.\n\nvar : array-like of shape (number of Gaussians,)\n Variances for Gaussians in original set.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\ntotal_mu : array-like of shape (number of Gaussians,)\n Updated mean for each Gaussian over the combined set.\n\ntotal_var : array-like of shape (number of Gaussians,)\n Updated variance for each Gaussian over the combined set."
- },
- {
- "name": "partial_fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- },
- {
- "name": "classes",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "List of all the classes that can possibly appear in the y vector. Must be provided at the first call to partial_fit, can be omitted in subsequent calls."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Weights applied to individual samples (1. for unweighted). .. versionadded:: 0.17"
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance and numerical stability overhead,\nhence it is better to call partial_fit on chunks of data that are\nas large as possible (as long as fitting in the memory budget) to\nhide the overhead.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nclasses : array-like of shape (n_classes,), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\n .. versionadded:: 0.17\n\nReturns\n-------\nself : object"
- },
- {
- "name": "_partial_fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- },
- {
- "name": "classes",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "List of all the classes that can possibly appear in the y vector. Must be provided at the first call to partial_fit, can be omitted in subsequent calls."
- },
- {
- "name": "_refit",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If true, act as though this were the first time we called _partial_fit (ie, throw away any past fitting and start over)."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Weights applied to individual samples (1. for unweighted)."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Actual implementation of Gaussian NB fitting.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nclasses : array-like of shape (n_classes,), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\n_refit : bool, default=False\n If true, act as though this were the first time we called\n _partial_fit (ie, throw away any past fitting and start over).\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object"
- },
- {
- "name": "_joint_log_likelihood",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Gaussian Naive Bayes (GaussianNB)\n\nCan perform online updates to model parameters via :meth:`partial_fit`.\nFor details on algorithm used to update feature means and variance online,\nsee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\npriors : array-like of shape (n_classes,)\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\nvar_smoothing : float, default=1e-9\n Portion of the largest variance of all features that is added to\n variances for calculation stability.\n\n .. versionadded:: 0.20\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n number of training samples observed in each class.\n\nclass_prior_ : ndarray of shape (n_classes,)\n probability of each class.\n\nclasses_ : ndarray of shape (n_classes,)\n class labels known to the classifier\n\nepsilon_ : float\n absolute additive value to variances\n\nsigma_ : ndarray of shape (n_classes, n_features)\n variance of each feature per class\n\ntheta_ : ndarray of shape (n_classes, n_features)\n mean of each feature per class\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> Y = np.array([1, 1, 1, 2, 2, 2])\n>>> from sklearn.naive_bayes import GaussianNB\n>>> clf = GaussianNB()\n>>> clf.fit(X, Y)\nGaussianNB()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n>>> clf_pf = GaussianNB()\n>>> clf_pf.partial_fit(X, Y, np.unique(Y))\nGaussianNB()\n>>> print(clf_pf.predict([[-0.8, -1]]))\n[1]"
- },
- {
- "name": "_BaseDiscreteNB",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "_check_X",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_X_y",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_update_class_log_prior",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_alpha",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "partial_fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- },
- {
- "name": "classes",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "List of all the classes that can possibly appear in the y vector. Must be provided at the first call to partial_fit, can be omitted in subsequent calls."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Weights applied to individual samples (1. for unweighted)."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance overhead hence it is better to call\npartial_fit on chunks of data that are as large as possible\n(as long as fitting in the memory budget) to hide the overhead.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nclasses : array-like of shape (n_classes), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object"
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Weights applied to individual samples (1. for unweighted)."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit Naive Bayes classifier according to X, y\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object"
- },
- {
- "name": "_init_counters",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "coef_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "intercept_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Abstract base class for naive Bayes on discrete/categorical data\n\nAny estimator based on this class should provide:\n\n__init__\n_joint_log_likelihood(X) as per _BaseNB"
- },
- {
- "name": "MultinomialNB",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "alpha",
- "type": "float",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing)."
- },
- {
- "name": "fit_prior",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used."
- },
- {
- "name": "class_prior",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Prior probabilities of the classes. If specified the priors are not adjusted according to the data."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_count",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Count and smooth feature occurrences."
- },
- {
- "name": "_update_feature_log_prob",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply smoothing to raw counts and recompute log probabilities"
- },
- {
- "name": "_joint_log_likelihood",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Calculate the posterior log probability of the samples X"
- }
- ],
- "docstring": "Naive Bayes classifier for multinomial models\n\nThe multinomial Naive Bayes classifier is suitable for classification with\ndiscrete features (e.g., word counts for text classification). The\nmultinomial distribution normally requires integer feature counts. However,\nin practice, fractional counts such as tf-idf may also work.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter\n (0 for no smoothing).\n\nfit_prior : bool, default=True\n Whether to learn class prior probabilities or not.\n If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes, )\n Smoothed empirical log probability for each class.\n\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\ncoef_ : ndarray of shape (n_classes, n_features)\n Mirrors ``feature_log_prob_`` for interpreting `MultinomialNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``coef_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n Number of samples encountered for each (class, feature)\n during fitting. This value is weighted by the sample weight when\n provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n Empirical log probability of features\n given a class, ``P(x_i|y)``.\n\nintercept_ : ndarray of shape (n_classes,)\n Mirrors ``class_log_prior_`` for interpreting `MultinomialNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``intercept_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\nn_features_ : int\n Number of features of each sample.\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import MultinomialNB\n>>> clf = MultinomialNB()\n>>> clf.fit(X, y)\nMultinomialNB()\n>>> print(clf.predict(X[2:3]))\n[3]\n\nNotes\n-----\nFor the rationale behind the names `coef_` and `intercept_`, i.e.\nnaive Bayes as a linear classifier, see J. Rennie et al. (2003),\nTackling the poor assumptions of naive Bayes text classifiers, ICML.\n\nReferences\n----------\nC.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\nInformation Retrieval. Cambridge University Press, pp. 234-265.\nhttps://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html"
- },
- {
- "name": "ComplementNB",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "alpha",
- "type": "float",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing)."
- },
- {
- "name": "fit_prior",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Only used in edge case with a single class in the training set."
- },
- {
- "name": "class_prior",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Prior probabilities of the classes. Not used."
- },
- {
- "name": "norm",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether or not a second normalization of the weights is performed. The default behavior mirrors the implementations found in Mahout and Weka, which do not follow the full algorithm described in Table 9 of the paper."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_count",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Count feature occurrences."
- },
- {
- "name": "_update_feature_log_prob",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply smoothing to raw counts and compute the weights."
- },
- {
- "name": "_joint_log_likelihood",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Calculate the class scores for the samples in X."
- }
- ],
- "docstring": "The Complement Naive Bayes classifier described in Rennie et al. (2003).\n\nThe Complement Naive Bayes classifier was designed to correct the \"severe\nassumptions\" made by the standard Multinomial Naive Bayes classifier. It is\nparticularly suited for imbalanced data sets.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nalpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).\n\nfit_prior : bool, default=True\n Only used in edge case with a single class in the training set.\n\nclass_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. Not used.\n\nnorm : bool, default=False\n Whether or not a second normalization of the weights is performed. The\n default behavior mirrors the implementations found in Mahout and Weka,\n which do not follow the full algorithm described in Table 9 of the\n paper.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes,)\n Smoothed empirical log probability for each class. Only used in edge\n case with a single class in the training set.\n\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\ncoef_ : ndarray of shape (n_classes, n_features)\n Mirrors ``feature_log_prob_`` for interpreting `ComplementNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``coef_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\nfeature_all_ : ndarray of shape (n_features,)\n Number of samples encountered for each feature during fitting. This\n value is weighted by the sample weight when provided.\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n Number of samples encountered for each (class, feature) during fitting.\n This value is weighted by the sample weight when provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n Empirical weights for class complements.\n\nintercept_ : ndarray of shape (n_classes,)\n Mirrors ``class_log_prior_`` for interpreting `ComplementNB`\n as a linear model.\n\n .. deprecated:: 0.24\n ``coef_`` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26).\n\nn_features_ : int\n Number of features of each sample.\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import ComplementNB\n>>> clf = ComplementNB()\n>>> clf.fit(X, y)\nComplementNB()\n>>> print(clf.predict(X[2:3]))\n[3]\n\nReferences\n----------\nRennie, J. D., Shih, L., Teevan, J., & Karger, D. R. (2003).\nTackling the poor assumptions of naive bayes text classifiers. In ICML\n(Vol. 3, pp. 616-623).\nhttps://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf"
- },
- {
- "name": "BernoulliNB",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "alpha",
- "type": "float",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing)."
- },
- {
- "name": "binarize",
- "type": "Optional[float]",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors."
- },
- {
- "name": "fit_prior",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used."
- },
- {
- "name": "class_prior",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Prior probabilities of the classes. If specified the priors are not adjusted according to the data."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_X",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_X_y",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_count",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Count and smooth feature occurrences."
- },
- {
- "name": "_update_feature_log_prob",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply smoothing to raw counts and recompute log probabilities"
- },
- {
- "name": "_joint_log_likelihood",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Calculate the posterior log probability of the samples X"
- }
- ],
- "docstring": "Naive Bayes classifier for multivariate Bernoulli models.\n\nLike MultinomialNB, this classifier is suitable for discrete data. The\ndifference is that while MultinomialNB works with occurrence counts,\nBernoulliNB is designed for binary/boolean features.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter\n (0 for no smoothing).\n\nbinarize : float or None, default=0.0\n Threshold for binarizing (mapping to booleans) of sample features.\n If None, input is presumed to already consist of binary vectors.\n\nfit_prior : bool, default=True\n Whether to learn class prior probabilities or not.\n If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes)\n Log probability of each class (smoothed).\n\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\ncoef_ : ndarray of shape (n_classes, n_features)\n Mirrors ``feature_log_prob_`` for interpreting `BernoulliNB`\n as a linear model.\n\nfeature_count_ : ndarray of shape (n_classes, n_features)\n Number of samples encountered for each (class, feature)\n during fitting. This value is weighted by the sample weight when\n provided.\n\nfeature_log_prob_ : ndarray of shape (n_classes, n_features)\n Empirical log probability of features given a class, P(x_i|y).\n\nintercept_ : ndarray of shape (n_classes,)\n Mirrors ``class_log_prior_`` for interpreting `BernoulliNB`\n as a linear model.\n\nn_features_ : int\n Number of features of each sample.\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> Y = np.array([1, 2, 3, 4, 4, 5])\n>>> from sklearn.naive_bayes import BernoulliNB\n>>> clf = BernoulliNB()\n>>> clf.fit(X, Y)\nBernoulliNB()\n>>> print(clf.predict(X[2:3]))\n[3]\n\nReferences\n----------\nC.D. Manning, P. Raghavan and H. Schuetze (2008). Introduction to\nInformation Retrieval. Cambridge University Press, pp. 234-265.\nhttps://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html\n\nA. McCallum and K. Nigam (1998). A comparison of event models for naive\nBayes text classification. Proc. AAAI/ICML-98 Workshop on Learning for\nText Categorization, pp. 41-48.\n\nV. Metsis, I. Androutsopoulos and G. Paliouras (2006). Spam filtering with\nnaive Bayes -- Which naive Bayes? 3rd Conf. on Email and Anti-Spam (CEAS)."
- },
- {
- "name": "CategoricalNB",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "alpha",
- "type": "float",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing)."
- },
- {
- "name": "fit_prior",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used."
- },
- {
- "name": "class_prior",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Prior probabilities of the classes. If specified the priors are not adjusted according to the data."
- },
- {
- "name": "min_categories",
- "type": "Union[ArrayLike, int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Minimum number of categories per feature. - integer: Sets the minimum number of categories per feature to `n_categories` for each features. - array-like: shape (n_features,) where `n_categories[i]` holds the minimum number of categories for the ith column of the input. - None (default): Determines the number of categories automatically from the training data. .. versionadded:: 0.24"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features. Here, each feature of X is assumed to be from a different categorical distribution. It is further assumed that all categories of each feature are represented by the numbers 0, ..., n - 1, where n refers to the total number of categories for the given feature. This can, for instance, be achieved with the help of OrdinalEncoder."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Weights applied to individual samples (1. for unweighted)."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit Naive Bayes classifier according to X, y\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features. Here, each feature of X is\n assumed to be from a different categorical distribution.\n It is further assumed that all categories of each feature are\n represented by the numbers 0, ..., n - 1, where n refers to the\n total number of categories for the given feature. This can, for\n instance, be achieved with the help of OrdinalEncoder.\n\ny : array-like of shape (n_samples,)\n Target values.\n\nsample_weight : array-like of shape (n_samples), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object"
- },
- {
- "name": "partial_fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of features. Here, each feature of X is assumed to be from a different categorical distribution. It is further assumed that all categories of each feature are represented by the numbers 0, ..., n - 1, where n refers to the total number of categories for the given feature. This can, for instance, be achieved with the help of OrdinalEncoder."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- },
- {
- "name": "classes",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "List of all the classes that can possibly appear in the y vector. Must be provided at the first call to partial_fit, can be omitted in subsequent calls."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Weights applied to individual samples (1. for unweighted)."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Incremental fit on a batch of samples.\n\nThis method is expected to be called several times consecutively\non different chunks of a dataset so as to implement out-of-core\nor online learning.\n\nThis is especially useful when the whole dataset is too big to fit in\nmemory at once.\n\nThis method has some performance overhead hence it is better to call\npartial_fit on chunks of data that are as large as possible\n(as long as fitting in the memory budget) to hide the overhead.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of features. Here, each feature of X is\n assumed to be from a different categorical distribution.\n It is further assumed that all categories of each feature are\n represented by the numbers 0, ..., n - 1, where n refers to the\n total number of categories for the given feature. This can, for\n instance, be achieved with the help of OrdinalEncoder.\n\ny : array-like of shape (n_samples)\n Target values.\n\nclasses : array-like of shape (n_classes), default=None\n List of all the classes that can possibly appear in the y vector.\n\n Must be provided at the first call to partial_fit, can be omitted\n in subsequent calls.\n\nsample_weight : array-like of shape (n_samples), default=None\n Weights applied to individual samples (1. for unweighted).\n\nReturns\n-------\nself : object"
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_X",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_X_y",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_init_counters",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_validate_n_categories",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_count",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_update_feature_log_prob",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_joint_log_likelihood",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Naive Bayes classifier for categorical features\n\nThe categorical Naive Bayes classifier is suitable for classification with\ndiscrete features that are categorically distributed. The categories of\neach feature are drawn from a categorical distribution.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nalpha : float, default=1.0\n Additive (Laplace/Lidstone) smoothing parameter\n (0 for no smoothing).\n\nfit_prior : bool, default=True\n Whether to learn class prior probabilities or not.\n If false, a uniform prior will be used.\n\nclass_prior : array-like of shape (n_classes,), default=None\n Prior probabilities of the classes. If specified the priors are not\n adjusted according to the data.\n\nmin_categories : int or array-like of shape (n_features,), default=None\n Minimum number of categories per feature.\n\n - integer: Sets the minimum number of categories per feature to\n `n_categories` for each features.\n - array-like: shape (n_features,) where `n_categories[i]` holds the\n minimum number of categories for the ith column of the input.\n - None (default): Determines the number of categories automatically\n from the training data.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\ncategory_count_ : list of arrays of shape (n_features,)\n Holds arrays of shape (n_classes, n_categories of respective feature)\n for each feature. Each array provides the number of samples\n encountered for each class and category of the specific feature.\n\nclass_count_ : ndarray of shape (n_classes,)\n Number of samples encountered for each class during fitting. This\n value is weighted by the sample weight when provided.\n\nclass_log_prior_ : ndarray of shape (n_classes,)\n Smoothed empirical log probability for each class.\n\nclasses_ : ndarray of shape (n_classes,)\n Class labels known to the classifier\n\nfeature_log_prob_ : list of arrays of shape (n_features,)\n Holds arrays of shape (n_classes, n_categories of respective feature)\n for each feature. Each array provides the empirical log probability\n of categories given the respective feature and class, ``P(x_i|y)``.\n\nn_features_ : int\n Number of features of each sample.\n\nn_categories_ : ndarray of shape (n_features,), dtype=np.int64\n Number of categories for each feature. This value is\n inferred from the data or set by the minimum number of categories.\n\n .. versionadded:: 0.24\n\nExamples\n--------\n>>> import numpy as np\n>>> rng = np.random.RandomState(1)\n>>> X = rng.randint(5, size=(6, 100))\n>>> y = np.array([1, 2, 3, 4, 5, 6])\n>>> from sklearn.naive_bayes import CategoricalNB\n>>> clf = CategoricalNB()\n>>> clf.fit(X, y)\nCategoricalNB()\n>>> print(clf.predict(X[2:3]))\n[3]"
- }
- ],
- "functions": []
- },
- {
- "name": "sklearn.pipeline",
- "imports": [
- "from collections import defaultdict",
- "from itertools import islice",
- "import numpy as np",
- "from scipy import sparse",
- "from joblib import Parallel",
- "from base import clone",
- "from base import TransformerMixin",
- "from utils._estimator_html_repr import _VisualBlock",
- "from utils.metaestimators import if_delegate_has_method",
- "from utils import Bunch",
- "from utils import _print_elapsed_time",
- "from utils.deprecation import deprecated",
- "from utils._tags import _safe_tags",
- "from utils.validation import check_memory",
- "from utils.validation import _deprecate_positional_args",
- "from utils.fixes import delayed",
- "from utils.metaestimators import _BaseComposition"
- ],
- "classes": [
- {
- "name": "Pipeline",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "steps",
- "type": "List",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "List of (name, transform) tuples (implementing fit/transform) that are chained, in the order in which they are chained, with the last object an estimator."
- },
- {
- "name": "memory",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Used to cache the fitted transformers of the pipeline. By default, no caching is performed. If a string is given, it is the path to the caching directory. Enabling caching triggers a clone of the transformers before fitting. Therefore, the transformer instance given to the pipeline cannot be inspected directly. Use the attribute ``named_steps`` or ``steps`` to inspect estimators within the pipeline. Caching the transformers is advantageous when fitting is time consuming."
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the time elapsed while fitting each step will be printed as it is completed."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "get_params",
- "decorators": [],
- "parameters": [
- {
- "name": "deep",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Get parameters for this estimator.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `steps` of the `Pipeline`.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : mapping of string to any\n Parameter names mapped to their values."
- },
- {
- "name": "set_params",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that\nyou can directly set the parameters of the estimators contained in\n`steps`.\n\nReturns\n-------\nself"
- },
- {
- "name": "_validate_steps",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_iter",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generate (idx, (name, trans)) tuples from self.steps\n\nWhen filter_passthrough is True, 'passthrough' and None transformers\nare filtered out."
- },
- {
- "name": "__len__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Returns the length of the Pipeline"
- },
- {
- "name": "__getitem__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Returns a sub-pipeline or a single esimtator in the pipeline\n\nIndexing with an integer will return an estimator; using a slice\nreturns another Pipeline instance which copies a slice of this\nPipeline. This copy is shallow: modifying (or fitting) estimators in\nthe sub-pipeline will affect the larger pipeline and vice-versa.\nHowever, replacing a value in `step` will not affect a copy."
- },
- {
- "name": "_estimator_type",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "named_steps",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_final_estimator",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_log_message",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_fit_params",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data. Must fulfill input requirements of first step of the pipeline."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training targets. Must fulfill label requirements for all steps of the pipeline."
- },
- {
- "name": "**fit_params",
- "type": "Dict",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameters passed to the ``fit`` method of each step, where each parameter name is prefixed such that parameter ``p`` for step ``s`` has key ``s__p``."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the model\n\nFit all the transforms one after the other and transform the\ndata, then fit the transformed data using the final estimator.\n\nParameters\n----------\nX : iterable\n Training data. Must fulfill input requirements of first step of the\n pipeline.\n\ny : iterable, default=None\n Training targets. Must fulfill label requirements for all steps of\n the pipeline.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\nReturns\n-------\nself : Pipeline\n This estimator"
- },
- {
- "name": "fit_transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data. Must fulfill input requirements of first step of the pipeline."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training targets. Must fulfill label requirements for all steps of the pipeline."
- },
- {
- "name": "**fit_params",
- "type": "Dict",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameters passed to the ``fit`` method of each step, where each parameter name is prefixed such that parameter ``p`` for step ``s`` has key ``s__p``."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fit the model and transform with the final estimator\n\nFits all the transforms one after the other and transforms the\ndata, then uses fit_transform on transformed data with the final\nestimator.\n\nParameters\n----------\nX : iterable\n Training data. Must fulfill input requirements of first step of the\n pipeline.\n\ny : iterable, default=None\n Training targets. Must fulfill label requirements for all steps of\n the pipeline.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\nReturns\n-------\nXt : array-like of shape (n_samples, n_transformed_features)\n Transformed samples"
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline."
- },
- {
- "name": "**predict_params",
- "type": "Dict",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameters to the ``predict`` called at the end of all transformations in the pipeline. Note that while this may be used to return uncertainties from some models with return_std or return_cov, uncertainties that are generated by the transformations in the pipeline are not propagated to the final estimator. .. versionadded:: 0.20"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply transforms to the data, and predict with the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\n**predict_params : dict of string -> object\n Parameters to the ``predict`` called at the end of all\n transformations in the pipeline. Note that while this may be\n used to return uncertainties from some models with return_std\n or return_cov, uncertainties that are generated by the\n transformations in the pipeline are not propagated to the\n final estimator.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ny_pred : array-like"
- },
- {
- "name": "fit_predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data. Must fulfill input requirements of first step of the pipeline."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training targets. Must fulfill label requirements for all steps of the pipeline."
- },
- {
- "name": "**fit_params",
- "type": "Dict",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameters passed to the ``fit`` method of each step, where each parameter name is prefixed such that parameter ``p`` for step ``s`` has key ``s__p``."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Applies fit_predict of last step in pipeline after transforms.\n\nApplies fit_transforms of a pipeline to the data, followed by the\nfit_predict method of the final estimator in the pipeline. Valid\nonly if the final estimator implements fit_predict.\n\nParameters\n----------\nX : iterable\n Training data. Must fulfill input requirements of first step of\n the pipeline.\n\ny : iterable, default=None\n Training targets. Must fulfill label requirements for all steps\n of the pipeline.\n\n**fit_params : dict of string -> object\n Parameters passed to the ``fit`` method of each step, where\n each parameter name is prefixed such that parameter ``p`` for step\n ``s`` has key ``s__p``.\n\nReturns\n-------\ny_pred : array-like"
- },
- {
- "name": "predict_proba",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply transforms, and predict_proba of the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\ny_proba : array-like of shape (n_samples, n_classes)"
- },
- {
- "name": "decision_function",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply transforms, and decision_function of the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\ny_score : array-like of shape (n_samples, n_classes)"
- },
- {
- "name": "score_samples",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply transforms, and score_samples of the final estimator.\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\ny_score : ndarray of shape (n_samples,)"
- },
- {
- "name": "predict_log_proba",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply transforms, and predict_log_proba of the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\ny_score : array-like of shape (n_samples, n_classes)"
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data to transform. Must fulfill input requirements of first step of the pipeline."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply transforms, and transform with the final estimator\n\nThis also works where final estimator is ``None``: all prior\ntransformations are applied.\n\nParameters\n----------\nX : iterable\n Data to transform. Must fulfill input requirements of first step\n of the pipeline.\n\nReturns\n-------\nXt : array-like of shape (n_samples, n_transformed_features)"
- },
- {
- "name": "_transform",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "inverse_transform",
- "decorators": [],
- "parameters": [
- {
- "name": "Xt",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data samples, where ``n_samples`` is the number of samples and ``n_features`` is the number of features. Must fulfill input requirements of last step of pipeline's ``inverse_transform`` method."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply inverse transformations in reverse order\n\nAll estimators in the pipeline must support ``inverse_transform``.\n\nParameters\n----------\nXt : array-like of shape (n_samples, n_transformed_features)\n Data samples, where ``n_samples`` is the number of samples and\n ``n_features`` is the number of features. Must fulfill\n input requirements of last step of pipeline's\n ``inverse_transform`` method.\n\nReturns\n-------\nXt : array-like of shape (n_samples, n_features)"
- },
- {
- "name": "_inverse_transform",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "score",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data to predict on. Must fulfill input requirements of first step of the pipeline."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Targets used for scoring. Must fulfill label requirements for all steps of the pipeline."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If not None, this argument is passed as ``sample_weight`` keyword argument to the ``score`` method of the final estimator."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply transforms, and score with the final estimator\n\nParameters\n----------\nX : iterable\n Data to predict on. Must fulfill input requirements of first step\n of the pipeline.\n\ny : iterable, default=None\n Targets used for scoring. Must fulfill label requirements for all\n steps of the pipeline.\n\nsample_weight : array-like, default=None\n If not None, this argument is passed as ``sample_weight`` keyword\n argument to the ``score`` method of the final estimator.\n\nReturns\n-------\nscore : float"
- },
- {
- "name": "classes_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_pairwise",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "n_features_in_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_sk_visual_block_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Pipeline of transforms with a final estimator.\n\nSequentially apply a list of transforms and a final estimator.\nIntermediate steps of the pipeline must be 'transforms', that is, they\nmust implement fit and transform methods.\nThe final estimator only needs to implement fit.\nThe transformers in the pipeline can be cached using ``memory`` argument.\n\nThe purpose of the pipeline is to assemble several steps that can be\ncross-validated together while setting different parameters.\nFor this, it enables setting parameters of the various steps using their\nnames and the parameter name separated by a '__', as in the example below.\nA step's estimator may be replaced entirely by setting the parameter\nwith its name to another estimator, or a transformer removed by setting\nit to 'passthrough' or ``None``.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.5\n\nParameters\n----------\nsteps : list\n List of (name, transform) tuples (implementing fit/transform) that are\n chained, in the order in which they are chained, with the last object\n an estimator.\n\nmemory : str or object with the joblib.Memory interface, default=None\n Used to cache the fitted transformers of the pipeline. By default,\n no caching is performed. If a string is given, it is the path to\n the caching directory. Enabling caching triggers a clone of\n the transformers before fitting. Therefore, the transformer\n instance given to the pipeline cannot be inspected\n directly. Use the attribute ``named_steps`` or ``steps`` to\n inspect estimators within the pipeline. Caching the\n transformers is advantageous when fitting is time consuming.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each step will be printed as it\n is completed.\n\nAttributes\n----------\nnamed_steps : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n Read-only attribute to access any step parameter by user given name.\n Keys are step names and values are steps parameters.\n\nSee Also\n--------\nmake_pipeline : Convenience function for simplified pipeline construction.\n\nExamples\n--------\n>>> from sklearn.svm import SVC\n>>> from sklearn.preprocessing import StandardScaler\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.pipeline import Pipeline\n>>> X, y = make_classification(random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(X, y,\n... random_state=0)\n>>> pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC())])\n>>> # The pipeline can be used as any other estimator\n>>> # and avoids leaking the test set into the train set\n>>> pipe.fit(X_train, y_train)\nPipeline(steps=[('scaler', StandardScaler()), ('svc', SVC())])\n>>> pipe.score(X_test, y_test)\n0.88"
- },
- {
- "name": "FeatureUnion",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "transformer_list",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "List of transformer objects to be applied to the data. The first half of each tuple is the name of the transformer. The tranformer can be 'drop' for it to be ignored. .. versionchanged:: 0.22 Deprecated `None` as a transformer in favor of 'drop'."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None"
- },
- {
- "name": "transformer_weights",
- "type": "Dict",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Multiplicative weights for features per transformer. Keys are transformer names, values the weights. Raises ValueError if key not present in ``transformer_list``."
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the time elapsed while fitting each transformer will be printed as it is completed."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "get_params",
- "decorators": [],
- "parameters": [
- {
- "name": "deep",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Get parameters for this estimator.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `transformer_list` of the\n`FeatureUnion`.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : mapping of string to any\n Parameter names mapped to their values."
- },
- {
- "name": "set_params",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that\nyou can directly set the parameters of the estimators contained in\n`tranformer_list`.\n\nReturns\n-------\nself"
- },
- {
- "name": "_validate_transformers",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_validate_transformer_weights",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_iter",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generate (name, trans, weight) tuples excluding None and\n'drop' transformers."
- },
- {
- "name": "get_feature_names",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Get feature names from all transformers.\n\nReturns\n-------\nfeature_names : list of strings\n Names of the features produced by transform."
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data, used to fit transformers."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Targets for supervised learning."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit all transformers using X.\n\nParameters\n----------\nX : iterable or array-like, depending on transformers\n Input data, used to fit transformers.\n\ny : array-like of shape (n_samples, n_outputs), default=None\n Targets for supervised learning.\n\nReturns\n-------\nself : FeatureUnion\n This estimator"
- },
- {
- "name": "fit_transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data to be transformed."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Targets for supervised learning."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fit all transformers, transform the data and concatenate results.\n\nParameters\n----------\nX : iterable or array-like, depending on transformers\n Input data to be transformed.\n\ny : array-like of shape (n_samples, n_outputs), default=None\n Targets for supervised learning.\n\nReturns\n-------\nX_t : array-like or sparse matrix of shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers."
- },
- {
- "name": "_log_message",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_parallel_func",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Runs func in parallel on X and y"
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data to be transformed."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Transform X separately by each transformer, concatenate results.\n\nParameters\n----------\nX : iterable or array-like, depending on transformers\n Input data to be transformed.\n\nReturns\n-------\nX_t : array-like or sparse matrix of shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers."
- },
- {
- "name": "_hstack",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_update_transformer_list",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "n_features_in_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_sk_visual_block_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Concatenates results of multiple transformer objects.\n\nThis estimator applies a list of transformer objects in parallel to the\ninput data, then concatenates the results. This is useful to combine\nseveral feature extraction mechanisms into a single transformer.\n\nParameters of the transformers may be set using its name and the parameter\nname separated by a '__'. A transformer may be replaced entirely by\nsetting the parameter with its name to another transformer,\nor removed by setting to 'drop'.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\ntransformer_list : list of (string, transformer) tuples\n List of transformer objects to be applied to the data. The first\n half of each tuple is the name of the transformer. The tranformer can\n be 'drop' for it to be ignored.\n\n .. versionchanged:: 0.22\n Deprecated `None` as a transformer in favor of 'drop'.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\ntransformer_weights : dict, default=None\n Multiplicative weights for features per transformer.\n Keys are transformer names, values the weights.\n Raises ValueError if key not present in ``transformer_list``.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\nSee Also\n--------\nmake_union : Convenience function for simplified feature union\n construction.\n\nExamples\n--------\n>>> from sklearn.pipeline import FeatureUnion\n>>> from sklearn.decomposition import PCA, TruncatedSVD\n>>> union = FeatureUnion([(\"pca\", PCA(n_components=1)),\n... (\"svd\", TruncatedSVD(n_components=2))])\n>>> X = [[0., 1., 3], [2., 2., 5]]\n>>> union.fit_transform(X)\narray([[ 1.5 , 3.0..., 0.8...],\n [-1.5 , 5.7..., -0.4...]])"
- }
- ],
- "functions": [
- {
- "name": "_name_estimators",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generate names for estimators."
- },
- {
- "name": "make_pipeline",
- "decorators": [],
- "parameters": [
- {
- "name": "*steps",
- "type": "List",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- },
- {
- "name": "memory",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Used to cache the fitted transformers of the pipeline. By default, no caching is performed. If a string is given, it is the path to the caching directory. Enabling caching triggers a clone of the transformers before fitting. Therefore, the transformer instance given to the pipeline cannot be inspected directly. Use the attribute ``named_steps`` or ``steps`` to inspect estimators within the pipeline. Caching the transformers is advantageous when fitting is time consuming."
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the time elapsed while fitting each step will be printed as it is completed."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Construct a Pipeline from the given estimators.\n\nThis is a shorthand for the Pipeline constructor; it does not require, and\ndoes not permit, naming the estimators. Instead, their names will be set\nto the lowercase of their types automatically.\n\nParameters\n----------\n*steps : list of estimators.\n\nmemory : str or object with the joblib.Memory interface, default=None\n Used to cache the fitted transformers of the pipeline. By default,\n no caching is performed. If a string is given, it is the path to\n the caching directory. Enabling caching triggers a clone of\n the transformers before fitting. Therefore, the transformer\n instance given to the pipeline cannot be inspected\n directly. Use the attribute ``named_steps`` or ``steps`` to\n inspect estimators within the pipeline. Caching the\n transformers is advantageous when fitting is time consuming.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each step will be printed as it\n is completed.\n\nSee Also\n--------\nPipeline : Class for creating a pipeline of transforms with a final\n estimator.\n\nExamples\n--------\n>>> from sklearn.naive_bayes import GaussianNB\n>>> from sklearn.preprocessing import StandardScaler\n>>> make_pipeline(StandardScaler(), GaussianNB(priors=None))\nPipeline(steps=[('standardscaler', StandardScaler()),\n ('gaussiannb', GaussianNB())])\n\nReturns\n-------\np : Pipeline"
- },
- {
- "name": "_transform_one",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_fit_transform_one",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned\nwith the fitted transformer. If ``weight`` is not ``None``, the result will\nbe multiplied by ``weight``."
- },
- {
- "name": "_fit_one",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fits ``transformer`` to ``X`` and ``y``."
- },
- {
- "name": "make_union",
- "decorators": [],
- "parameters": [
- {
- "name": "*transformers",
- "type": "List",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None"
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the time elapsed while fitting each transformer will be printed as it is completed."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Construct a FeatureUnion from the given transformers.\n\nThis is a shorthand for the FeatureUnion constructor; it does not require,\nand does not permit, naming the transformers. Instead, they will be given\nnames automatically based on their types. It also does not allow weighting.\n\nParameters\n----------\n*transformers : list of estimators\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\nReturns\n-------\nf : FeatureUnion\n\nSee Also\n--------\nFeatureUnion : Class for concatenating the results of multiple transformer\n objects.\n\nExamples\n--------\n>>> from sklearn.decomposition import PCA, TruncatedSVD\n>>> from sklearn.pipeline import make_union\n>>> make_union(PCA(), TruncatedSVD())\n FeatureUnion(transformer_list=[('pca', PCA()),\n ('truncatedsvd', TruncatedSVD())])"
- }
- ]
- },
- {
- "name": "sklearn.random_projection",
- "imports": [
- "import warnings",
- "from abc import ABCMeta",
- "from abc import abstractmethod",
- "import numpy as np",
- "import scipy.sparse as sp",
- "from base import BaseEstimator",
- "from base import TransformerMixin",
- "from utils import check_random_state",
- "from utils.extmath import safe_sparse_dot",
- "from utils.random import sample_without_replacement",
- "from utils.validation import check_array",
- "from utils.validation import check_is_fitted",
- "from utils.validation import _deprecate_positional_args",
- "from exceptions import DataDimensionalityWarning"
- ],
- "classes": [
- {
- "name": "BaseRandomProjection",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_make_random_matrix",
- "decorators": [],
- "parameters": [
- {
- "name": "n_components",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Dimensionality of the target projection space."
- },
- {
- "name": "n_features",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Dimensionality of the original source space."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generate the random projection matrix.\n\nParameters\n----------\nn_components : int,\n Dimensionality of the target projection space.\n\nn_features : int,\n Dimensionality of the original source space.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format."
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training set: only the shape is used to find optimal random matrix dimensions based on the theory referenced in the afore mentioned papers."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Ignored"
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Generate a sparse random projection matrix.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n Training set: only the shape is used to find optimal random\n matrix dimensions based on the theory referenced in the\n afore mentioned papers.\n\ny\n Ignored\n\nReturns\n-------\nself"
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The input data to project into a smaller dimensional space."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Project the data by using matrix product with the random matrix\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input data to project into a smaller dimensional space.\n\nReturns\n-------\nX_new : {ndarray, sparse matrix} of shape (n_samples, n_components)\n Projected array."
- }
- ],
- "docstring": "Base class for random projections.\n\nWarning: This class should not be used directly.\nUse derived classes instead."
- },
- {
- "name": "GaussianRandomProjection",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "n_components",
- "type": "Union[Literal['auto'], int]",
- "hasDefault": true,
- "default": "'auto'",
- "limitation": null,
- "ignored": false,
- "docstring": "Dimensionality of the target projection space. n_components can be automatically adjusted according to the number of samples in the dataset and the bound given by the Johnson-Lindenstrauss lemma. In that case the quality of the embedding is controlled by the ``eps`` parameter. It should be noted that Johnson-Lindenstrauss lemma can yield very conservative estimated of the required number of components as it makes no assumption on the structure of the dataset."
- },
- {
- "name": "eps",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameter to control the quality of the embedding according to the Johnson-Lindenstrauss lemma when `n_components` is set to 'auto'. The value should be strictly positive. Smaller values lead to better embedding and higher number of dimensions (n_components) in the target projection space."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Controls the pseudo random number generator used to generate the projection matrix at fit time. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_make_random_matrix",
- "decorators": [],
- "parameters": [
- {
- "name": "n_components",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Dimensionality of the target projection space."
- },
- {
- "name": "n_features",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Dimensionality of the original source space."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generate the random projection matrix.\n\nParameters\n----------\nn_components : int,\n Dimensionality of the target projection space.\n\nn_features : int,\n Dimensionality of the original source space.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format."
- }
- ],
- "docstring": "Reduce dimensionality through Gaussian random projection.\n\nThe components of the random matrix are drawn from N(0, 1 / n_components).\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_components : int or 'auto', default='auto'\n Dimensionality of the target projection space.\n\n n_components can be automatically adjusted according to the\n number of samples in the dataset and the bound given by the\n Johnson-Lindenstrauss lemma. In that case the quality of the\n embedding is controlled by the ``eps`` parameter.\n\n It should be noted that Johnson-Lindenstrauss lemma can yield\n very conservative estimated of the required number of components\n as it makes no assumption on the structure of the dataset.\n\neps : float, default=0.1\n Parameter to control the quality of the embedding according to\n the Johnson-Lindenstrauss lemma when `n_components` is set to\n 'auto'. The value should be strictly positive.\n\n Smaller values lead to better embedding and higher number of\n dimensions (n_components) in the target projection space.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the\n projection matrix at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nn_components_ : int\n Concrete number of components computed when n_components=\"auto\".\n\ncomponents_ : ndarray of shape (n_components, n_features)\n Random matrix used for the projection.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.random_projection import GaussianRandomProjection\n>>> rng = np.random.RandomState(42)\n>>> X = rng.rand(100, 10000)\n>>> transformer = GaussianRandomProjection(random_state=rng)\n>>> X_new = transformer.fit_transform(X)\n>>> X_new.shape\n(100, 3947)\n\nSee Also\n--------\nSparseRandomProjection"
- },
- {
- "name": "SparseRandomProjection",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "n_components",
- "type": "Union[Literal['auto'], int]",
- "hasDefault": true,
- "default": "'auto'",
- "limitation": null,
- "ignored": false,
- "docstring": "Dimensionality of the target projection space. n_components can be automatically adjusted according to the number of samples in the dataset and the bound given by the Johnson-Lindenstrauss lemma. In that case the quality of the embedding is controlled by the ``eps`` parameter. It should be noted that Johnson-Lindenstrauss lemma can yield very conservative estimated of the required number of components as it makes no assumption on the structure of the dataset."
- },
- {
- "name": "density",
- "type": "Union[Literal['auto'], float]",
- "hasDefault": true,
- "default": "'auto'",
- "limitation": null,
- "ignored": false,
- "docstring": "Ratio in the range (0, 1] of non-zero component in the random projection matrix. If density = 'auto', the value is set to the minimum density as recommended by Ping Li et al.: 1 / sqrt(n_features). Use density = 1 / 3.0 if you want to reproduce the results from Achlioptas, 2001."
- },
- {
- "name": "eps",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameter to control the quality of the embedding according to the Johnson-Lindenstrauss lemma when n_components is set to 'auto'. This value should be strictly positive. Smaller values lead to better embedding and higher number of dimensions (n_components) in the target projection space."
- },
- {
- "name": "dense_output",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, ensure that the output of the random projection is a dense numpy array even if the input and random projection matrix are both sparse. In practice, if the number of components is small the number of zero components in the projected data will be very small and it will be more CPU and memory efficient to use a dense representation. If False, the projected data uses a sparse representation if the input is sparse."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Controls the pseudo random number generator used to generate the projection matrix at fit time. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_make_random_matrix",
- "decorators": [],
- "parameters": [
- {
- "name": "n_components",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Dimensionality of the target projection space."
- },
- {
- "name": "n_features",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Dimensionality of the original source space."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generate the random projection matrix\n\nParameters\n----------\nn_components : int\n Dimensionality of the target projection space.\n\nn_features : int\n Dimensionality of the original source space.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated random matrix. Sparse matrix will be of CSR format."
- }
- ],
- "docstring": "Reduce dimensionality through sparse random projection.\n\nSparse random matrix is an alternative to dense random\nprojection matrix that guarantees similar embedding quality while being\nmuch more memory efficient and allowing faster computation of the\nprojected data.\n\nIf we note `s = 1 / density` the components of the random matrix are\ndrawn from:\n\n - -sqrt(s) / sqrt(n_components) with probability 1 / 2s\n - 0 with probability 1 - 1 / s\n - +sqrt(s) / sqrt(n_components) with probability 1 / 2s\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_components : int or 'auto', default='auto'\n Dimensionality of the target projection space.\n\n n_components can be automatically adjusted according to the\n number of samples in the dataset and the bound given by the\n Johnson-Lindenstrauss lemma. In that case the quality of the\n embedding is controlled by the ``eps`` parameter.\n\n It should be noted that Johnson-Lindenstrauss lemma can yield\n very conservative estimated of the required number of components\n as it makes no assumption on the structure of the dataset.\n\ndensity : float or 'auto', default='auto'\n Ratio in the range (0, 1] of non-zero component in the random\n projection matrix.\n\n If density = 'auto', the value is set to the minimum density\n as recommended by Ping Li et al.: 1 / sqrt(n_features).\n\n Use density = 1 / 3.0 if you want to reproduce the results from\n Achlioptas, 2001.\n\neps : float, default=0.1\n Parameter to control the quality of the embedding according to\n the Johnson-Lindenstrauss lemma when n_components is set to\n 'auto'. This value should be strictly positive.\n\n Smaller values lead to better embedding and higher number of\n dimensions (n_components) in the target projection space.\n\ndense_output : bool, default=False\n If True, ensure that the output of the random projection is a\n dense numpy array even if the input and random projection matrix\n are both sparse. In practice, if the number of components is\n small the number of zero components in the projected data will\n be very small and it will be more CPU and memory efficient to\n use a dense representation.\n\n If False, the projected data uses a sparse representation if\n the input is sparse.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the\n projection matrix at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nAttributes\n----------\nn_components_ : int\n Concrete number of components computed when n_components=\"auto\".\n\ncomponents_ : sparse matrix of shape (n_components, n_features)\n Random matrix used for the projection. Sparse matrix will be of CSR\n format.\n\ndensity_ : float in range 0.0 - 1.0\n Concrete density computed from when density = \"auto\".\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.random_projection import SparseRandomProjection\n>>> rng = np.random.RandomState(42)\n>>> X = rng.rand(100, 10000)\n>>> transformer = SparseRandomProjection(random_state=rng)\n>>> X_new = transformer.fit_transform(X)\n>>> X_new.shape\n(100, 3947)\n>>> # very few components are non-zero\n>>> np.mean(transformer.components_ != 0)\n0.0100...\n\nSee Also\n--------\nGaussianRandomProjection\n\nReferences\n----------\n\n.. [1] Ping Li, T. Hastie and K. W. Church, 2006,\n \"Very Sparse Random Projections\".\n https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf\n\n.. [2] D. Achlioptas, 2001, \"Database-friendly random projections\",\n https://users.soe.ucsc.edu/~optas/papers/jl.pdf"
- }
- ],
- "functions": [
- {
- "name": "johnson_lindenstrauss_min_dim",
- "decorators": [],
- "parameters": [
- {
- "name": "n_samples",
- "type": "Union[ArrayLike, int]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of samples that should be a integer greater than 0. If an array is given, it will compute a safe number of components array-wise."
- },
- {
- "name": "eps",
- "type": "Union[NDArray, float]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum distortion rate in the range (0,1 ) as defined by the Johnson-Lindenstrauss lemma. If an array is given, it will compute a safe number of components array-wise."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Find a 'safe' number of components to randomly project to.\n\nThe distortion introduced by a random projection `p` only changes the\ndistance between two points by a factor (1 +- eps) in an euclidean space\nwith good probability. The projection `p` is an eps-embedding as defined\nby:\n\n (1 - eps) ||u - v||^2 < ||p(u) - p(v)||^2 < (1 + eps) ||u - v||^2\n\nWhere u and v are any rows taken from a dataset of shape (n_samples,\nn_features), eps is in ]0, 1[ and p is a projection by a random Gaussian\nN(0, 1) matrix of shape (n_components, n_features) (or a sparse\nAchlioptas matrix).\n\nThe minimum number of components to guarantee the eps-embedding is\ngiven by:\n\n n_components >= 4 log(n_samples) / (eps^2 / 2 - eps^3 / 3)\n\nNote that the number of dimensions is independent of the original\nnumber of features but instead depends on the size of the dataset:\nthe larger the dataset, the higher is the minimal dimensionality of\nan eps-embedding.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int or array-like of int\n Number of samples that should be a integer greater than 0. If an array\n is given, it will compute a safe number of components array-wise.\n\neps : float or ndarray of shape (n_components,), dtype=float, default=0.1\n Maximum distortion rate in the range (0,1 ) as defined by the\n Johnson-Lindenstrauss lemma. If an array is given, it will compute a\n safe number of components array-wise.\n\nReturns\n-------\nn_components : int or ndarray of int\n The minimal number of components to guarantee with good probability\n an eps-embedding with n_samples.\n\nExamples\n--------\n\n>>> johnson_lindenstrauss_min_dim(1e6, eps=0.5)\n663\n\n>>> johnson_lindenstrauss_min_dim(1e6, eps=[0.5, 0.1, 0.01])\narray([ 663, 11841, 1112658])\n\n>>> johnson_lindenstrauss_min_dim([1e4, 1e5, 1e6], eps=0.1)\narray([ 7894, 9868, 11841])\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma\n\n.. [2] Sanjoy Dasgupta and Anupam Gupta, 1999,\n \"An elementary proof of the Johnson-Lindenstrauss Lemma.\"\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.45.3654"
- },
- {
- "name": "_check_density",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Factorize density check according to Li et al."
- },
- {
- "name": "_check_input_size",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Factorize argument checking for random matrix generation."
- },
- {
- "name": "_gaussian_random_matrix",
- "decorators": [],
- "parameters": [
- {
- "name": "n_components",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Dimensionality of the target projection space."
- },
- {
- "name": "n_features",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Dimensionality of the original source space."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Controls the pseudo random number generator used to generate the matrix at fit time. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generate a dense Gaussian random matrix.\n\nThe components of the random matrix are drawn from\n\n N(0, 1.0 / n_components).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int,\n Dimensionality of the target projection space.\n\nn_features : int,\n Dimensionality of the original source space.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the matrix\n at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ncomponents : ndarray of shape (n_components, n_features)\n The generated Gaussian random matrix.\n\nSee Also\n--------\nGaussianRandomProjection"
- },
- {
- "name": "_sparse_random_matrix",
- "decorators": [],
- "parameters": [
- {
- "name": "n_components",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Dimensionality of the target projection space."
- },
- {
- "name": "n_features",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Dimensionality of the original source space."
- },
- {
- "name": "density",
- "type": "Union[Literal['auto'], float]",
- "hasDefault": true,
- "default": "'auto'",
- "limitation": null,
- "ignored": false,
- "docstring": "Ratio of non-zero component in the random projection matrix in the range `(0, 1]` If density = 'auto', the value is set to the minimum density as recommended by Ping Li et al.: 1 / sqrt(n_features). Use density = 1 / 3.0 if you want to reproduce the results from Achlioptas, 2001."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Controls the pseudo random number generator used to generate the matrix at fit time. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generalized Achlioptas random sparse matrix for random projection.\n\nSetting density to 1 / 3 will yield the original matrix by Dimitris\nAchlioptas while setting a lower value will yield the generalization\nby Ping Li et al.\n\nIf we note :math:`s = 1 / density`, the components of the random matrix are\ndrawn from:\n\n - -sqrt(s) / sqrt(n_components) with probability 1 / 2s\n - 0 with probability 1 - 1 / s\n - +sqrt(s) / sqrt(n_components) with probability 1 / 2s\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int,\n Dimensionality of the target projection space.\n\nn_features : int,\n Dimensionality of the original source space.\n\ndensity : float or 'auto', default='auto'\n Ratio of non-zero component in the random projection matrix in the\n range `(0, 1]`\n\n If density = 'auto', the value is set to the minimum density\n as recommended by Ping Li et al.: 1 / sqrt(n_features).\n\n Use density = 1 / 3.0 if you want to reproduce the results from\n Achlioptas, 2001.\n\nrandom_state : int, RandomState instance or None, default=None\n Controls the pseudo random number generator used to generate the matrix\n at fit time.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n The generated Gaussian random matrix. Sparse matrix will be of CSR\n format.\n\nSee Also\n--------\nSparseRandomProjection\n\nReferences\n----------\n\n.. [1] Ping Li, T. Hastie and K. W. Church, 2006,\n \"Very Sparse Random Projections\".\n https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf\n\n.. [2] D. Achlioptas, 2001, \"Database-friendly random projections\",\n http://www.cs.ucsc.edu/~optas/papers/jl.pdf"
- }
- ]
- },
- {
- "name": "sklearn.setup",
- "imports": [
- "import sys",
- "import os",
- "from sklearn._build_utils import cythonize_extensions",
- "from numpy.distutils.misc_util import Configuration",
- "import numpy",
- "from numpy.distutils.core import setup"
- ],
- "classes": [],
- "functions": [
- {
- "name": "configuration",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn._config",
- "imports": [
- "import os",
- "from contextlib import contextmanager as contextmanager"
- ],
- "classes": [],
- "functions": [
- {
- "name": "get_config",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Retrieve current values for configuration set by :func:`set_config`\n\nReturns\n-------\nconfig : dict\n Keys are parameter names that can be passed to :func:`set_config`.\n\nSee Also\n--------\nconfig_context : Context manager for global scikit-learn configuration.\nset_config : Set global scikit-learn configuration."
- },
- {
- "name": "set_config",
- "decorators": [],
- "parameters": [
- {
- "name": "assume_finite",
- "type": "bool",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, validation for finiteness will be skipped, saving time, but leading to potential crashes. If False, validation for finiteness will be performed, avoiding error. Global default: False. .. versionadded:: 0.19"
- },
- {
- "name": "working_memory",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If set, scikit-learn will attempt to limit the size of temporary arrays to this number of MiB (per job when parallelised), often saving both computation time and memory on expensive operations that can be performed in chunks. Global default: 1024. .. versionadded:: 0.20"
- },
- {
- "name": "print_changed_only",
- "type": "bool",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, only the parameters that were set to non-default values will be printed when printing an estimator. For example, ``print(SVC())`` while True will only print 'SVC()' while the default behaviour would be to print 'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters. .. versionadded:: 0.21"
- },
- {
- "name": "display",
- "type": "Literal['text', 'diagram']",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If 'diagram', estimators will be displayed as a diagram in a Jupyter lab or notebook context. If 'text', estimators will be displayed as text. Default is 'text'. .. versionadded:: 0.23"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Set global scikit-learn configuration\n\n.. versionadded:: 0.19\n\nParameters\n----------\nassume_finite : bool, default=None\n If True, validation for finiteness will be skipped,\n saving time, but leading to potential crashes. If\n False, validation for finiteness will be performed,\n avoiding error. Global default: False.\n\n .. versionadded:: 0.19\n\nworking_memory : int, default=None\n If set, scikit-learn will attempt to limit the size of temporary arrays\n to this number of MiB (per job when parallelised), often saving both\n computation time and memory on expensive operations that can be\n performed in chunks. Global default: 1024.\n\n .. versionadded:: 0.20\n\nprint_changed_only : bool, default=None\n If True, only the parameters that were set to non-default\n values will be printed when printing an estimator. For example,\n ``print(SVC())`` while True will only print 'SVC()' while the default\n behaviour would be to print 'SVC(C=1.0, cache_size=200, ...)' with\n all the non-changed parameters.\n\n .. versionadded:: 0.21\n\ndisplay : {'text', 'diagram'}, default=None\n If 'diagram', estimators will be displayed as a diagram in a Jupyter\n lab or notebook context. If 'text', estimators will be displayed as\n text. Default is 'text'.\n\n .. versionadded:: 0.23\n\nSee Also\n--------\nconfig_context : Context manager for global scikit-learn configuration.\nget_config : Retrieve current values of the global configuration."
- },
- {
- "name": "config_context",
- "decorators": [],
- "parameters": [
- {
- "name": "assume_finite",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, validation for finiteness will be skipped, saving time, but leading to potential crashes. If False, validation for finiteness will be performed, avoiding error. Global default: False."
- },
- {
- "name": "working_memory",
- "type": "int",
- "hasDefault": true,
- "default": "1024",
- "limitation": null,
- "ignored": false,
- "docstring": "If set, scikit-learn will attempt to limit the size of temporary arrays to this number of MiB (per job when parallelised), often saving both computation time and memory on expensive operations that can be performed in chunks. Global default: 1024."
- },
- {
- "name": "print_changed_only",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, only the parameters that were set to non-default values will be printed when printing an estimator. For example, ``print(SVC())`` while True will only print 'SVC()', but would print 'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters when False. Default is True. .. versionchanged:: 0.23 Default changed from False to True."
- },
- {
- "name": "display",
- "type": "Literal['text', 'diagram']",
- "hasDefault": true,
- "default": "'text'",
- "limitation": null,
- "ignored": false,
- "docstring": "If 'diagram', estimators will be displayed as a diagram in a Jupyter lab or notebook context. If 'text', estimators will be displayed as text. Default is 'text'. .. versionadded:: 0.23"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Context manager for global scikit-learn configuration\n\nParameters\n----------\nassume_finite : bool, default=False\n If True, validation for finiteness will be skipped,\n saving time, but leading to potential crashes. If\n False, validation for finiteness will be performed,\n avoiding error. Global default: False.\n\nworking_memory : int, default=1024\n If set, scikit-learn will attempt to limit the size of temporary arrays\n to this number of MiB (per job when parallelised), often saving both\n computation time and memory on expensive operations that can be\n performed in chunks. Global default: 1024.\n\nprint_changed_only : bool, default=True\n If True, only the parameters that were set to non-default\n values will be printed when printing an estimator. For example,\n ``print(SVC())`` while True will only print 'SVC()', but would print\n 'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters\n when False. Default is True.\n\n .. versionchanged:: 0.23\n Default changed from False to True.\n\ndisplay : {'text', 'diagram'}, default='text'\n If 'diagram', estimators will be displayed as a diagram in a Jupyter\n lab or notebook context. If 'text', estimators will be displayed as\n text. Default is 'text'.\n\n .. versionadded:: 0.23\n\nNotes\n-----\nAll settings, not just those presently modified, will be returned to\ntheir previous values when the context manager is exited. This is not\nthread-safe.\n\nExamples\n--------\n>>> import sklearn\n>>> from sklearn.utils.validation import assert_all_finite\n>>> with sklearn.config_context(assume_finite=True):\n... assert_all_finite([float('nan')])\n>>> with sklearn.config_context(assume_finite=True):\n... with sklearn.config_context(assume_finite=False):\n... assert_all_finite([float('nan')])\nTraceback (most recent call last):\n...\nValueError: Input contains NaN, ...\n\nSee Also\n--------\nset_config : Set global scikit-learn configuration.\nget_config : Retrieve current values of the global configuration."
- }
- ]
- },
- {
- "name": "sklearn._distributor_init",
- "imports": [
- "import os",
- "import os.path as op",
- "from ctypes import WinDLL"
- ],
- "classes": [],
- "functions": []
- },
- {
- "name": "sklearn._min_dependencies",
- "imports": [
- "import platform",
- "import argparse"
- ],
- "classes": [],
- "functions": []
- },
- {
- "name": "sklearn",
- "imports": [
- "import sys",
- "import logging",
- "import os",
- "import random",
- "from _config import get_config",
- "from _config import set_config",
- "from _config import config_context",
- "from None import _distributor_init",
- "from None import __check_build",
- "from base import clone",
- "from utils._show_versions import show_versions",
- "import numpy as np"
- ],
- "classes": [],
- "functions": [
- {
- "name": "setup_module",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fixture for the tests to assure globally controllable seeding of RNGs"
- }
- ]
- },
- {
- "name": "sklearn.cluster.setup",
- "imports": [
- "import os",
- "import numpy",
- "from numpy.distutils.misc_util import Configuration",
- "from numpy.distutils.core import setup"
- ],
- "classes": [],
- "functions": [
- {
- "name": "configuration",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.cluster._affinity_propagation",
- "imports": [
- "import numpy as np",
- "import warnings",
- "from exceptions import ConvergenceWarning",
- "from base import BaseEstimator",
- "from base import ClusterMixin",
- "from utils import as_float_array",
- "from utils import check_random_state",
- "from utils.deprecation import deprecated",
- "from utils.validation import check_is_fitted",
- "from utils.validation import _deprecate_positional_args",
- "from metrics import euclidean_distances",
- "from metrics import pairwise_distances_argmin",
- "from _config import config_context"
- ],
- "classes": [
- {
- "name": "AffinityPropagation",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "damping",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Damping factor (between 0.5 and 1) is the extent to which the current value is maintained relative to incoming values (weighted 1 - damping). This in order to avoid numerical oscillations when updating these values (messages)."
- },
- {
- "name": "max_iter",
- "type": "int",
- "hasDefault": true,
- "default": "200",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum number of iterations."
- },
- {
- "name": "convergence_iter",
- "type": "int",
- "hasDefault": true,
- "default": "15",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of iterations with no change in the number of estimated clusters that stops the convergence."
- },
- {
- "name": "copy",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Make a copy of input data."
- },
- {
- "name": "preference",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Preferences for each point - points with larger values of preferences are more likely to be chosen as exemplars. The number of exemplars, ie of clusters, is influenced by the input preferences value. If the preferences are not passed as arguments, they will be set to the median of the input similarities."
- },
- {
- "name": "affinity",
- "type": "Literal['euclidean', 'precomputed']",
- "hasDefault": true,
- "default": "'euclidean'",
- "limitation": null,
- "ignored": false,
- "docstring": "Which affinity to use. At the moment 'precomputed' and ``euclidean`` are supported. 'euclidean' uses the negative squared euclidean distance between points."
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to be verbose."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Pseudo-random number generator to control the starting state. Use an int for reproducible results across function calls. See the :term:`Glossary `. .. versionadded:: 0.23 this parameter was previously hardcoded as 0."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_pairwise",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training instances to cluster, or similarities / affinities between instances if ``affinity='precomputed'``. If a sparse feature matrix is provided, it will be converted into a sparse ``csr_matrix``."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present here for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the clustering from features, or affinity matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse feature matrix\n is provided, it will be converted into a sparse ``csr_matrix``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself"
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "New data to predict. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Predict the closest cluster each sample in X belongs to.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict. If a sparse matrix is provided, it will be\n converted into a sparse ``csr_matrix``.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels."
- },
- {
- "name": "fit_predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training instances to cluster, or similarities / affinities between instances if ``affinity='precomputed'``. If a sparse feature matrix is provided, it will be converted into a sparse ``csr_matrix``."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present here for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fit the clustering from features or affinity matrix, and return\ncluster labels.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse feature matrix\n is provided, it will be converted into a sparse ``csr_matrix``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels."
- }
- ],
- "docstring": "Perform Affinity Propagation Clustering of data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndamping : float, default=0.5\n Damping factor (between 0.5 and 1) is the extent to\n which the current value is maintained relative to\n incoming values (weighted 1 - damping). This in order\n to avoid numerical oscillations when updating these\n values (messages).\n\nmax_iter : int, default=200\n Maximum number of iterations.\n\nconvergence_iter : int, default=15\n Number of iterations with no change in the number\n of estimated clusters that stops the convergence.\n\ncopy : bool, default=True\n Make a copy of input data.\n\npreference : array-like of shape (n_samples,) or float, default=None\n Preferences for each point - points with larger values of\n preferences are more likely to be chosen as exemplars. The number\n of exemplars, ie of clusters, is influenced by the input\n preferences value. If the preferences are not passed as arguments,\n they will be set to the median of the input similarities.\n\naffinity : {'euclidean', 'precomputed'}, default='euclidean'\n Which affinity to use. At the moment 'precomputed' and\n ``euclidean`` are supported. 'euclidean' uses the\n negative squared euclidean distance between points.\n\nverbose : bool, default=False\n Whether to be verbose.\n\nrandom_state : int, RandomState instance or None, default=0\n Pseudo-random number generator to control the starting state.\n Use an int for reproducible results across function calls.\n See the :term:`Glossary `.\n\n .. versionadded:: 0.23\n this parameter was previously hardcoded as 0.\n\nAttributes\n----------\ncluster_centers_indices_ : ndarray of shape (n_clusters,)\n Indices of cluster centers.\n\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n Cluster centers (if affinity != ``precomputed``).\n\nlabels_ : ndarray of shape (n_samples,)\n Labels of each point.\n\naffinity_matrix_ : ndarray of shape (n_samples, n_samples)\n Stores the affinity matrix used in ``fit``.\n\nn_iter_ : int\n Number of iterations taken to converge.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n`.\n\nThe algorithmic complexity of affinity propagation is quadratic\nin the number of points.\n\nWhen ``fit`` does not converge, ``cluster_centers_`` becomes an empty\narray and all training samples will be labelled as ``-1``. In addition,\n``predict`` will then label every sample as ``-1``.\n\nWhen all training samples have equal similarities and equal preferences,\nthe assignment of cluster centers and labels depends on the preference.\nIf the preference is smaller than the similarities, ``fit`` will result in\na single cluster center and label ``0`` for every sample. Otherwise, every\ntraining sample becomes its own cluster center and is assigned a unique\nlabel.\n\nReferences\n----------\n\nBrendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\nBetween Data Points\", Science Feb. 2007\n\nExamples\n--------\n>>> from sklearn.cluster import AffinityPropagation\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [4, 2], [4, 4], [4, 0]])\n>>> clustering = AffinityPropagation(random_state=5).fit(X)\n>>> clustering\nAffinityPropagation(random_state=5)\n>>> clustering.labels_\narray([0, 0, 0, 1, 1, 1])\n>>> clustering.predict([[0, 0], [4, 4]])\narray([0, 1])\n>>> clustering.cluster_centers_\narray([[1, 2],\n [4, 2]])"
- }
- ],
- "functions": [
- {
- "name": "_equal_similarities_and_preferences",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "affinity_propagation",
- "decorators": [],
- "parameters": [
- {
- "name": "S",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Matrix of similarities between points."
- },
- {
- "name": "preference",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Preferences for each point - points with larger values of preferences are more likely to be chosen as exemplars. The number of exemplars, i.e. of clusters, is influenced by the input preferences value. If the preferences are not passed as arguments, they will be set to the median of the input similarities (resulting in a moderate number of clusters). For a smaller amount of clusters, this can be set to the minimum value of the similarities."
- },
- {
- "name": "convergence_iter",
- "type": "int",
- "hasDefault": true,
- "default": "15",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of iterations with no change in the number of estimated clusters that stops the convergence."
- },
- {
- "name": "max_iter",
- "type": "int",
- "hasDefault": true,
- "default": "200",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum number of iterations"
- },
- {
- "name": "damping",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Damping factor between 0.5 and 1."
- },
- {
- "name": "copy",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If copy is False, the affinity matrix is modified inplace by the algorithm, for memory efficiency."
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "The verbosity level."
- },
- {
- "name": "return_n_iter",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether or not to return the number of iterations."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Pseudo-random number generator to control the starting state. Use an int for reproducible results across function calls. See the :term:`Glossary `. .. versionadded:: 0.23 this parameter was previously hardcoded as 0."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Perform Affinity Propagation Clustering of data.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nS : array-like of shape (n_samples, n_samples)\n Matrix of similarities between points.\n\npreference : array-like of shape (n_samples,) or float, default=None\n Preferences for each point - points with larger values of\n preferences are more likely to be chosen as exemplars. The number of\n exemplars, i.e. of clusters, is influenced by the input preferences\n value. If the preferences are not passed as arguments, they will be\n set to the median of the input similarities (resulting in a moderate\n number of clusters). For a smaller amount of clusters, this can be set\n to the minimum value of the similarities.\n\nconvergence_iter : int, default=15\n Number of iterations with no change in the number\n of estimated clusters that stops the convergence.\n\nmax_iter : int, default=200\n Maximum number of iterations\n\ndamping : float, default=0.5\n Damping factor between 0.5 and 1.\n\ncopy : bool, default=True\n If copy is False, the affinity matrix is modified inplace by the\n algorithm, for memory efficiency.\n\nverbose : bool, default=False\n The verbosity level.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nrandom_state : int, RandomState instance or None, default=0\n Pseudo-random number generator to control the starting state.\n Use an int for reproducible results across function calls.\n See the :term:`Glossary `.\n\n .. versionadded:: 0.23\n this parameter was previously hardcoded as 0.\n\nReturns\n-------\n\ncluster_centers_indices : ndarray of shape (n_clusters,)\n Index of clusters centers.\n\nlabels : ndarray of shape (n_samples,)\n Cluster labels for each point.\n\nn_iter : int\n Number of iterations run. Returned only if `return_n_iter` is\n set to True.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_affinity_propagation.py\n`.\n\nWhen the algorithm does not converge, it returns an empty array as\n``cluster_center_indices`` and ``-1`` as label for each training sample.\n\nWhen all training samples have equal similarities and equal preferences,\nthe assignment of cluster centers and labels depends on the preference.\nIf the preference is smaller than the similarities, a single cluster center\nand label ``0`` for every sample will be returned. Otherwise, every\ntraining sample becomes its own cluster center and is assigned a unique\nlabel.\n\nReferences\n----------\nBrendan J. Frey and Delbert Dueck, \"Clustering by Passing Messages\nBetween Data Points\", Science Feb. 2007"
- }
- ]
- },
- {
- "name": "sklearn.cluster._agglomerative",
- "imports": [
- "import warnings",
- "from heapq import heapify",
- "from heapq import heappop",
- "from heapq import heappush",
- "from heapq import heappushpop",
- "import numpy as np",
- "from scipy import sparse",
- "from scipy.sparse.csgraph import connected_components",
- "from base import BaseEstimator",
- "from base import ClusterMixin",
- "from metrics.pairwise import paired_distances",
- "from metrics.pairwise import pairwise_distances",
- "from neighbors import DistanceMetric",
- "from neighbors._dist_metrics import METRIC_MAPPING",
- "from utils import check_array",
- "from utils._fast_dict import IntFloatDict",
- "from utils.fixes import _astype_copy_false",
- "from utils.validation import _deprecate_positional_args",
- "from utils.validation import check_memory",
- "from None import _hierarchical_fast as _hierarchical",
- "from _feature_agglomeration import AgglomerationTransform",
- "from scipy.sparse.csgraph import minimum_spanning_tree",
- "from scipy.cluster import hierarchy"
- ],
- "classes": [
- {
- "name": "AgglomerativeClustering",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "n_clusters",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of clusters to find. It must be ``None`` if ``distance_threshold`` is not ``None``."
- },
- {
- "name": "affinity",
- "type": "Union[Callable, str]",
- "hasDefault": true,
- "default": "'euclidean'",
- "limitation": null,
- "ignored": false,
- "docstring": "Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\", \"manhattan\", \"cosine\", or \"precomputed\". If linkage is \"ward\", only \"euclidean\" is accepted. If \"precomputed\", a distance matrix (instead of a similarity matrix) is needed as input for the fit method."
- },
- {
- "name": "memory",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Used to cache the output of the computation of the tree. By default, no caching is done. If a string is given, it is the path to the caching directory."
- },
- {
- "name": "connectivity",
- "type": "Union[Callable, ArrayLike]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Connectivity matrix. Defines for each sample the neighboring samples following a given structure of the data. This can be a connectivity matrix itself or a callable that transforms the data into a connectivity matrix, such as derived from kneighbors_graph. Default is ``None``, i.e, the hierarchical clustering algorithm is unstructured."
- },
- {
- "name": "compute_full_tree",
- "type": "Union[Literal['auto'], bool]",
- "hasDefault": true,
- "default": "'auto'",
- "limitation": null,
- "ignored": false,
- "docstring": "Stop early the construction of the tree at ``n_clusters``. This is useful to decrease computation time if the number of clusters is not small compared to the number of samples. This option is useful only when specifying a connectivity matrix. Note also that when varying the number of clusters and using caching, it may be advantageous to compute the full tree. It must be ``True`` if ``distance_threshold`` is not ``None``. By default `compute_full_tree` is \"auto\", which is equivalent to `True` when `distance_threshold` is not `None` or that `n_clusters` is inferior to the maximum between 100 or `0.02 * n_samples`. Otherwise, \"auto\" is equivalent to `False`."
- },
- {
- "name": "linkage",
- "type": "Literal['ward', 'complete', 'average', 'single']",
- "hasDefault": true,
- "default": "'ward'",
- "limitation": null,
- "ignored": false,
- "docstring": "Which linkage criterion to use. The linkage criterion determines which distance to use between sets of observation. The algorithm will merge the pairs of cluster that minimize this criterion. - 'ward' minimizes the variance of the clusters being merged. - 'average' uses the average of the distances of each observation of the two sets. - 'complete' or 'maximum' linkage uses the maximum distances between all observations of the two sets. - 'single' uses the minimum of the distances between all observations of the two sets. .. versionadded:: 0.20 Added the 'single' option"
- },
- {
- "name": "distance_threshold",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The linkage distance threshold above which, clusters will not be merged. If not ``None``, ``n_clusters`` must be ``None`` and ``compute_full_tree`` must be ``True``. .. versionadded:: 0.21"
- },
- {
- "name": "compute_distances",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Computes distances between clusters even if `distance_threshold` is not used. This can be used to make dendrogram visualization, but introduces a computational and memory overhead. .. versionadded:: 0.24"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training instances to cluster, or distances between instances if ``affinity='precomputed'``."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present here for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the hierarchical clustering from features, or distance matrix.\n\nParameters\n----------\nX : array-like, shape (n_samples, n_features) or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``affinity='precomputed'``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself"
- },
- {
- "name": "fit_predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training instances to cluster, or distances between instances if ``affinity='precomputed'``."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present here for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fit the hierarchical clustering from features or distance matrix,\nand return cluster labels.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``affinity='precomputed'``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels."
- }
- ],
- "docstring": "Agglomerative Clustering\n\nRecursively merges the pair of clusters that minimally increases\na given linkage distance.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int or None, default=2\n The number of clusters to find. It must be ``None`` if\n ``distance_threshold`` is not ``None``.\n\naffinity : str or callable, default='euclidean'\n Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n \"manhattan\", \"cosine\", or \"precomputed\".\n If linkage is \"ward\", only \"euclidean\" is accepted.\n If \"precomputed\", a distance matrix (instead of a similarity matrix)\n is needed as input for the fit method.\n\nmemory : str or object with the joblib.Memory interface, default=None\n Used to cache the output of the computation of the tree.\n By default, no caching is done. If a string is given, it is the\n path to the caching directory.\n\nconnectivity : array-like or callable, default=None\n Connectivity matrix. Defines for each sample the neighboring\n samples following a given structure of the data.\n This can be a connectivity matrix itself or a callable that transforms\n the data into a connectivity matrix, such as derived from\n kneighbors_graph. Default is ``None``, i.e, the\n hierarchical clustering algorithm is unstructured.\n\ncompute_full_tree : 'auto' or bool, default='auto'\n Stop early the construction of the tree at ``n_clusters``. This is\n useful to decrease computation time if the number of clusters is not\n small compared to the number of samples. This option is useful only\n when specifying a connectivity matrix. Note also that when varying the\n number of clusters and using caching, it may be advantageous to compute\n the full tree. It must be ``True`` if ``distance_threshold`` is not\n ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n to `True` when `distance_threshold` is not `None` or that `n_clusters`\n is inferior to the maximum between 100 or `0.02 * n_samples`.\n Otherwise, \"auto\" is equivalent to `False`.\n\nlinkage : {'ward', 'complete', 'average', 'single'}, default='ward'\n Which linkage criterion to use. The linkage criterion determines which\n distance to use between sets of observation. The algorithm will merge\n the pairs of cluster that minimize this criterion.\n\n - 'ward' minimizes the variance of the clusters being merged.\n - 'average' uses the average of the distances of each observation of\n the two sets.\n - 'complete' or 'maximum' linkage uses the maximum distances between\n all observations of the two sets.\n - 'single' uses the minimum of the distances between all observations\n of the two sets.\n\n .. versionadded:: 0.20\n Added the 'single' option\n\ndistance_threshold : float, default=None\n The linkage distance threshold above which, clusters will not be\n merged. If not ``None``, ``n_clusters`` must be ``None`` and\n ``compute_full_tree`` must be ``True``.\n\n .. versionadded:: 0.21\n\ncompute_distances : bool, default=False\n Computes distances between clusters even if `distance_threshold` is not\n used. This can be used to make dendrogram visualization, but introduces\n a computational and memory overhead.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nn_clusters_ : int\n The number of clusters found by the algorithm. If\n ``distance_threshold=None``, it will be equal to the given\n ``n_clusters``.\n\nlabels_ : ndarray of shape (n_samples)\n cluster labels for each point\n\nn_leaves_ : int\n Number of leaves in the hierarchical tree.\n\nn_connected_components_ : int\n The estimated number of connected components in the graph.\n\n .. versionadded:: 0.21\n ``n_connected_components_`` was added to replace ``n_components_``.\n\nchildren_ : array-like of shape (n_samples-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\ndistances_ : array-like of shape (n_nodes-1,)\n Distances between nodes in the corresponding place in `children_`.\n Only computed if `distance_threshold` is used or `compute_distances`\n is set to `True`.\n\nExamples\n--------\n>>> from sklearn.cluster import AgglomerativeClustering\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [4, 2], [4, 4], [4, 0]])\n>>> clustering = AgglomerativeClustering().fit(X)\n>>> clustering\nAgglomerativeClustering()\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])"
- },
- {
- "name": "FeatureAgglomeration",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "n_clusters",
- "type": "int",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of clusters to find. It must be ``None`` if ``distance_threshold`` is not ``None``."
- },
- {
- "name": "affinity",
- "type": "Union[Callable, str]",
- "hasDefault": true,
- "default": "'euclidean'",
- "limitation": null,
- "ignored": false,
- "docstring": "Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\", \"manhattan\", \"cosine\", or 'precomputed'. If linkage is \"ward\", only \"euclidean\" is accepted."
- },
- {
- "name": "memory",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Used to cache the output of the computation of the tree. By default, no caching is done. If a string is given, it is the path to the caching directory."
- },
- {
- "name": "connectivity",
- "type": "Union[Callable, ArrayLike]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Connectivity matrix. Defines for each feature the neighboring features following a given structure of the data. This can be a connectivity matrix itself or a callable that transforms the data into a connectivity matrix, such as derived from kneighbors_graph. Default is None, i.e, the hierarchical clustering algorithm is unstructured."
- },
- {
- "name": "compute_full_tree",
- "type": "Union[Literal['auto'], bool]",
- "hasDefault": true,
- "default": "'auto'",
- "limitation": null,
- "ignored": false,
- "docstring": "Stop early the construction of the tree at n_clusters. This is useful to decrease computation time if the number of clusters is not small compared to the number of features. This option is useful only when specifying a connectivity matrix. Note also that when varying the number of clusters and using caching, it may be advantageous to compute the full tree. It must be ``True`` if ``distance_threshold`` is not ``None``. By default `compute_full_tree` is \"auto\", which is equivalent to `True` when `distance_threshold` is not `None` or that `n_clusters` is inferior to the maximum between 100 or `0.02 * n_samples`. Otherwise, \"auto\" is equivalent to `False`."
- },
- {
- "name": "linkage",
- "type": "Literal['ward', 'complete', 'average', 'single']",
- "hasDefault": true,
- "default": "'ward'",
- "limitation": null,
- "ignored": false,
- "docstring": "Which linkage criterion to use. The linkage criterion determines which distance to use between sets of features. The algorithm will merge the pairs of cluster that minimize this criterion. - ward minimizes the variance of the clusters being merged. - average uses the average of the distances of each feature of the two sets. - complete or maximum linkage uses the maximum distances between all features of the two sets. - single uses the minimum of the distances between all observations of the two sets."
- },
- {
- "name": "pooling_func",
- "type": "Callable",
- "hasDefault": true,
- "default": "np",
- "limitation": null,
- "ignored": false,
- "docstring": "This combines the values of agglomerated features into a single value, and should accept an array of shape [M, N] and the keyword argument `axis=1`, and reduce it to an array of size [M]."
- },
- {
- "name": "distance_threshold",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The linkage distance threshold above which, clusters will not be merged. If not ``None``, ``n_clusters`` must be ``None`` and ``compute_full_tree`` must be ``True``. .. versionadded:: 0.21"
- },
- {
- "name": "compute_distances",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Computes distances between clusters even if `distance_threshold` is not used. This can be used to make dendrogram visualization, but introduces a computational and memory overhead. .. versionadded:: 0.24"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The data"
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": ""
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the hierarchical clustering on the data\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data\n\ny : Ignored\n\nReturns\n-------\nself"
- },
- {
- "name": "fit_predict",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Agglomerate features.\n\nSimilar to AgglomerativeClustering, but recursively merges features\ninstead of samples.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int, default=2\n The number of clusters to find. It must be ``None`` if\n ``distance_threshold`` is not ``None``.\n\naffinity : str or callable, default='euclidean'\n Metric used to compute the linkage. Can be \"euclidean\", \"l1\", \"l2\",\n \"manhattan\", \"cosine\", or 'precomputed'.\n If linkage is \"ward\", only \"euclidean\" is accepted.\n\nmemory : str or object with the joblib.Memory interface, default=None\n Used to cache the output of the computation of the tree.\n By default, no caching is done. If a string is given, it is the\n path to the caching directory.\n\nconnectivity : array-like or callable, default=None\n Connectivity matrix. Defines for each feature the neighboring\n features following a given structure of the data.\n This can be a connectivity matrix itself or a callable that transforms\n the data into a connectivity matrix, such as derived from\n kneighbors_graph. Default is None, i.e, the\n hierarchical clustering algorithm is unstructured.\n\ncompute_full_tree : 'auto' or bool, default='auto'\n Stop early the construction of the tree at n_clusters. This is useful\n to decrease computation time if the number of clusters is not small\n compared to the number of features. This option is useful only when\n specifying a connectivity matrix. Note also that when varying the\n number of clusters and using caching, it may be advantageous to compute\n the full tree. It must be ``True`` if ``distance_threshold`` is not\n ``None``. By default `compute_full_tree` is \"auto\", which is equivalent\n to `True` when `distance_threshold` is not `None` or that `n_clusters`\n is inferior to the maximum between 100 or `0.02 * n_samples`.\n Otherwise, \"auto\" is equivalent to `False`.\n\nlinkage : {'ward', 'complete', 'average', 'single'}, default='ward'\n Which linkage criterion to use. The linkage criterion determines which\n distance to use between sets of features. The algorithm will merge\n the pairs of cluster that minimize this criterion.\n\n - ward minimizes the variance of the clusters being merged.\n - average uses the average of the distances of each feature of\n the two sets.\n - complete or maximum linkage uses the maximum distances between\n all features of the two sets.\n - single uses the minimum of the distances between all observations\n of the two sets.\n\npooling_func : callable, default=np.mean\n This combines the values of agglomerated features into a single\n value, and should accept an array of shape [M, N] and the keyword\n argument `axis=1`, and reduce it to an array of size [M].\n\ndistance_threshold : float, default=None\n The linkage distance threshold above which, clusters will not be\n merged. If not ``None``, ``n_clusters`` must be ``None`` and\n ``compute_full_tree`` must be ``True``.\n\n .. versionadded:: 0.21\n\ncompute_distances : bool, default=False\n Computes distances between clusters even if `distance_threshold` is not\n used. This can be used to make dendrogram visualization, but introduces\n a computational and memory overhead.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\nn_clusters_ : int\n The number of clusters found by the algorithm. If\n ``distance_threshold=None``, it will be equal to the given\n ``n_clusters``.\n\nlabels_ : array-like of (n_features,)\n cluster labels for each feature.\n\nn_leaves_ : int\n Number of leaves in the hierarchical tree.\n\nn_connected_components_ : int\n The estimated number of connected components in the graph.\n\n .. versionadded:: 0.21\n ``n_connected_components_`` was added to replace ``n_components_``.\n\nchildren_ : array-like of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_features`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_features` is a non-leaf\n node and has children `children_[i - n_features]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_features + i`\n\ndistances_ : array-like of shape (n_nodes-1,)\n Distances between nodes in the corresponding place in `children_`.\n Only computed if `distance_threshold` is used or `compute_distances`\n is set to `True`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn import datasets, cluster\n>>> digits = datasets.load_digits()\n>>> images = digits.images\n>>> X = np.reshape(images, (len(images), -1))\n>>> agglo = cluster.FeatureAgglomeration(n_clusters=32)\n>>> agglo.fit(X)\nFeatureAgglomeration(n_clusters=32)\n>>> X_reduced = agglo.transform(X)\n>>> X_reduced.shape\n(1797, 32)"
- }
- ],
- "functions": [
- {
- "name": "_fix_connectivity",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fixes the connectivity matrix\n\n - copies it\n - makes it symmetric\n - converts it to LIL if necessary\n - completes it if necessary"
- },
- {
- "name": "_single_linkage_tree",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Perform single linkage clustering on sparse data via the minimum\nspanning tree from scipy.sparse.csgraph, then using union-find to label.\nThe parent array is then generated by walking through the tree."
- },
- {
- "name": "ward_tree",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "feature matrix representing n_samples samples to be clustered"
- },
- {
- "name": "connectivity",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "connectivity matrix. Defines for each sample the neighboring samples following a given structure of the data. The matrix is assumed to be symmetric and only the upper triangular half is used. Default is None, i.e, the Ward algorithm is unstructured."
- },
- {
- "name": "n_clusters",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Stop early the construction of the tree at n_clusters. This is useful to decrease computation time if the number of clusters is not small compared to the number of samples. In this case, the complete tree is not computed, thus the 'children' output is of limited use, and the 'parents' output should rather be used. This option is valid only when specifying a connectivity matrix."
- },
- {
- "name": "return_distance",
- "type": "bool",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, return the distance between the clusters."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Ward clustering based on a Feature matrix.\n\nRecursively merges the pair of clusters that minimally increases\nwithin-cluster variance.\n\nThe inertia matrix uses a Heapq-based representation.\n\nThis is the structured version, that takes into account some topological\nstructure between samples.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n feature matrix representing n_samples samples to be clustered\n\nconnectivity : sparse matrix, default=None\n connectivity matrix. Defines for each sample the neighboring samples\n following a given structure of the data. The matrix is assumed to\n be symmetric and only the upper triangular half is used.\n Default is None, i.e, the Ward algorithm is unstructured.\n\nn_clusters : int, default=None\n Stop early the construction of the tree at n_clusters. This is\n useful to decrease computation time if the number of clusters is\n not small compared to the number of samples. In this case, the\n complete tree is not computed, thus the 'children' output is of\n limited use, and the 'parents' output should rather be used.\n This option is valid only when specifying a connectivity matrix.\n\nreturn_distance : bool, default=None\n If True, return the distance between the clusters.\n\nReturns\n-------\nchildren : ndarray of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\nn_connected_components : int\n The number of connected components in the graph.\n\nn_leaves : int\n The number of leaves in the tree\n\nparents : ndarray of shape (n_nodes,) or None\n The parent of each node. Only returned when a connectivity matrix\n is specified, elsewhere 'None' is returned.\n\ndistances : ndarray of shape (n_nodes-1,)\n Only returned if return_distance is set to True (for compatibility).\n The distances between the centers of the nodes. `distances[i]`\n corresponds to a weighted euclidean distance between\n the nodes `children[i, 1]` and `children[i, 2]`. If the nodes refer to\n leaves of the tree, then `distances[i]` is their unweighted euclidean\n distance. Distances are updated in the following way\n (from scipy.hierarchy.linkage):\n\n The new entry :math:`d(u,v)` is computed as follows,\n\n .. math::\n\n d(u,v) = \\sqrt{\\frac{|v|+|s|}\n {T}d(v,s)^2\n + \\frac{|v|+|t|}\n {T}d(v,t)^2\n - \\frac{|v|}\n {T}d(s,t)^2}\n\n where :math:`u` is the newly joined cluster consisting of\n clusters :math:`s` and :math:`t`, :math:`v` is an unused\n cluster in the forest, :math:`T=|v|+|s|+|t|`, and\n :math:`|*|` is the cardinality of its argument. This is also\n known as the incremental algorithm."
- },
- {
- "name": "linkage_tree",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "feature matrix representing n_samples samples to be clustered"
- },
- {
- "name": "connectivity",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "connectivity matrix. Defines for each sample the neighboring samples following a given structure of the data. The matrix is assumed to be symmetric and only the upper triangular half is used. Default is None, i.e, the Ward algorithm is unstructured."
- },
- {
- "name": "n_clusters",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Stop early the construction of the tree at n_clusters. This is useful to decrease computation time if the number of clusters is not small compared to the number of samples. In this case, the complete tree is not computed, thus the 'children' output is of limited use, and the 'parents' output should rather be used. This option is valid only when specifying a connectivity matrix."
- },
- {
- "name": "linkage",
- "type": "Literal[\"average\", \"complete\", \"single\"]",
- "hasDefault": true,
- "default": "\"complete\"",
- "limitation": null,
- "ignored": false,
- "docstring": "Which linkage criteria to use. The linkage criterion determines which distance to use between sets of observation. - average uses the average of the distances of each observation of the two sets - complete or maximum linkage uses the maximum distances between all observations of the two sets. - single uses the minimum of the distances between all observations of the two sets."
- },
- {
- "name": "affinity",
- "type": "Union[Callable, str]",
- "hasDefault": true,
- "default": "\"euclidean\"",
- "limitation": null,
- "ignored": false,
- "docstring": "which metric to use. Can be \"euclidean\", \"manhattan\", or any distance know to paired distance (see metric.pairwise)"
- },
- {
- "name": "return_distance",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "whether or not to return the distances between the clusters."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Linkage agglomerative clustering based on a Feature matrix.\n\nThe inertia matrix uses a Heapq-based representation.\n\nThis is the structured version, that takes into account some topological\nstructure between samples.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n feature matrix representing n_samples samples to be clustered\n\nconnectivity : sparse matrix, default=None\n connectivity matrix. Defines for each sample the neighboring samples\n following a given structure of the data. The matrix is assumed to\n be symmetric and only the upper triangular half is used.\n Default is None, i.e, the Ward algorithm is unstructured.\n\nn_clusters : int, default=None\n Stop early the construction of the tree at n_clusters. This is\n useful to decrease computation time if the number of clusters is\n not small compared to the number of samples. In this case, the\n complete tree is not computed, thus the 'children' output is of\n limited use, and the 'parents' output should rather be used.\n This option is valid only when specifying a connectivity matrix.\n\nlinkage : {\"average\", \"complete\", \"single\"}, default=\"complete\"\n Which linkage criteria to use. The linkage criterion determines which\n distance to use between sets of observation.\n - average uses the average of the distances of each observation of\n the two sets\n - complete or maximum linkage uses the maximum distances between\n all observations of the two sets.\n - single uses the minimum of the distances between all observations\n of the two sets.\n\naffinity : str or callable, default=\"euclidean\".\n which metric to use. Can be \"euclidean\", \"manhattan\", or any\n distance know to paired distance (see metric.pairwise)\n\nreturn_distance : bool, default=False\n whether or not to return the distances between the clusters.\n\nReturns\n-------\nchildren : ndarray of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\nn_connected_components : int\n The number of connected components in the graph.\n\nn_leaves : int\n The number of leaves in the tree.\n\nparents : ndarray of shape (n_nodes, ) or None\n The parent of each node. Only returned when a connectivity matrix\n is specified, elsewhere 'None' is returned.\n\ndistances : ndarray of shape (n_nodes-1,)\n Returned when return_distance is set to True.\n\n distances[i] refers to the distance between children[i][0] and\n children[i][1] when they are merged.\n\nSee Also\n--------\nward_tree : Hierarchical clustering with ward linkage."
- },
- {
- "name": "_complete_linkage",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_average_linkage",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_single_linkage",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_hc_cut",
- "decorators": [],
- "parameters": [
- {
- "name": "n_clusters",
- "type": "Union[NDArray, int]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of clusters to form."
- },
- {
- "name": "children",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The children of each non-leaf node. Values less than `n_samples` correspond to leaves of the tree which are the original samples. A node `i` greater than or equal to `n_samples` is a non-leaf node and has children `children_[i - n_samples]`. Alternatively at the i-th iteration, children[i][0] and children[i][1] are merged to form node `n_samples + i`"
- },
- {
- "name": "n_leaves",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of leaves of the tree."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Function cutting the ward tree for a given number of clusters.\n\nParameters\n----------\nn_clusters : int or ndarray\n The number of clusters to form.\n\nchildren : ndarray of shape (n_nodes-1, 2)\n The children of each non-leaf node. Values less than `n_samples`\n correspond to leaves of the tree which are the original samples.\n A node `i` greater than or equal to `n_samples` is a non-leaf\n node and has children `children_[i - n_samples]`. Alternatively\n at the i-th iteration, children[i][0] and children[i][1]\n are merged to form node `n_samples + i`\n\nn_leaves : int\n Number of leaves of the tree.\n\nReturns\n-------\nlabels : array [n_samples]\n cluster labels for each point"
- }
- ]
- },
- {
- "name": "sklearn.cluster._bicluster",
- "imports": [
- "from abc import ABCMeta",
- "from abc import abstractmethod",
- "import warnings",
- "import numpy as np",
- "from scipy.linalg import norm",
- "from scipy.sparse import dia_matrix",
- "from scipy.sparse import issparse",
- "from scipy.sparse.linalg import eigsh",
- "from scipy.sparse.linalg import svds",
- "from None import KMeans",
- "from None import MiniBatchKMeans",
- "from base import BaseEstimator",
- "from base import BiclusterMixin",
- "from utils import check_random_state",
- "from utils.extmath import make_nonnegative",
- "from utils.extmath import randomized_svd",
- "from utils.extmath import safe_sparse_dot",
- "from utils.validation import assert_all_finite",
- "from utils.validation import _deprecate_positional_args"
- ],
- "classes": [
- {
- "name": "BaseSpectral",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_parameters",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": ""
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Creates a biclustering for X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n\ny : Ignored"
- },
- {
- "name": "_svd",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Returns first `n_components` left and right singular\nvectors u and v, discarding the first `n_discard`."
- },
- {
- "name": "_k_means",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Base class for spectral biclustering."
- },
- {
- "name": "SpectralCoclustering",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "n_clusters",
- "type": "int",
- "hasDefault": true,
- "default": "3",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of biclusters to find."
- },
- {
- "name": "svd_method",
- "type": "Literal['randomized', 'arpack']",
- "hasDefault": true,
- "default": "'randomized'",
- "limitation": null,
- "ignored": false,
- "docstring": "Selects the algorithm for finding singular vectors. May be 'randomized' or 'arpack'. If 'randomized', use :func:`sklearn.utils.extmath.randomized_svd`, which may be faster for large matrices. If 'arpack', use :func:`scipy.sparse.linalg.svds`, which is more accurate, but possibly slower in some cases."
- },
- {
- "name": "n_svd_vecs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of vectors to use in calculating the SVD. Corresponds to `ncv` when `svd_method=arpack` and `n_oversamples` when `svd_method` is 'randomized`."
- },
- {
- "name": "mini_batch",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to use mini-batch k-means, which is faster but may get different results."
- },
- {
- "name": "init",
- "type": null,
- "hasDefault": true,
- "default": "'k-means++'",
- "limitation": null,
- "ignored": false,
- "docstring": "Method for initialization of k-means algorithm; defaults to 'k-means++'."
- },
- {
- "name": "n_init",
- "type": "int",
- "hasDefault": true,
- "default": "10",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of random initializations that are tried with the k-means algorithm. If mini-batch k-means is used, the best initialization is chosen and the algorithm runs once. Otherwise, the algorithm is run for each initialization and the best solution chosen."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. deprecated:: 0.23 ``n_jobs`` was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25)."
- },
- {
- "name": "random_state",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Used for randomizing the singular value decomposition and the k-means initialization. Use an int to make the randomness deterministic. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Spectral Co-Clustering algorithm (Dhillon, 2001).\n\nClusters rows and columns of an array `X` to solve the relaxed\nnormalized cut of the bipartite graph created from `X` as follows:\nthe edge between row vertex `i` and column vertex `j` has weight\n`X[i, j]`.\n\nThe resulting bicluster structure is block-diagonal, since each\nrow and each column belongs to exactly one bicluster.\n\nSupports sparse matrices, as long as they are nonnegative.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int, default=3\n The number of biclusters to find.\n\nsvd_method : {'randomized', 'arpack'}, default='randomized'\n Selects the algorithm for finding singular vectors. May be\n 'randomized' or 'arpack'. If 'randomized', use\n :func:`sklearn.utils.extmath.randomized_svd`, which may be faster\n for large matrices. If 'arpack', use\n :func:`scipy.sparse.linalg.svds`, which is more accurate, but\n possibly slower in some cases.\n\nn_svd_vecs : int, default=None\n Number of vectors to use in calculating the SVD. Corresponds\n to `ncv` when `svd_method=arpack` and `n_oversamples` when\n `svd_method` is 'randomized`.\n\nmini_batch : bool, default=False\n Whether to use mini-batch k-means, which is faster but may get\n different results.\n\ninit : {'k-means++', 'random', or ndarray of shape (n_clusters, n_features), default='k-means++'\n Method for initialization of k-means algorithm; defaults to\n 'k-means++'.\n\nn_init : int, default=10\n Number of random initializations that are tried with the\n k-means algorithm.\n\n If mini-batch k-means is used, the best initialization is\n chosen and the algorithm runs once. Otherwise, the algorithm\n is run for each initialization and the best solution chosen.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\nrandom_state : int, RandomState instance, default=None\n Used for randomizing the singular value decomposition and the k-means\n initialization. Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\nAttributes\n----------\nrows_ : array-like of shape (n_row_clusters, n_rows)\n Results of the clustering. `rows[i, r]` is True if\n cluster `i` contains row `r`. Available only after calling ``fit``.\n\ncolumns_ : array-like of shape (n_column_clusters, n_columns)\n Results of the clustering, like `rows`.\n\nrow_labels_ : array-like of shape (n_rows,)\n The bicluster label of each row.\n\ncolumn_labels_ : array-like of shape (n_cols,)\n The bicluster label of each column.\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralCoclustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n... [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralCoclustering(n_clusters=2, random_state=0).fit(X)\n>>> clustering.row_labels_ #doctest: +SKIP\narray([0, 1, 1, 0, 0, 0], dtype=int32)\n>>> clustering.column_labels_ #doctest: +SKIP\narray([0, 0], dtype=int32)\n>>> clustering\nSpectralCoclustering(n_clusters=2, random_state=0)\n\nReferences\n----------\n\n* Dhillon, Inderjit S, 2001. `Co-clustering documents and words using\n bipartite spectral graph partitioning\n `__."
- },
- {
- "name": "SpectralBiclustering",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "n_clusters",
- "type": "Union[Tuple[], int]",
- "hasDefault": true,
- "default": "3",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of row and column clusters in the checkerboard structure."
- },
- {
- "name": "method",
- "type": "Literal['bistochastic', 'scale', 'log']",
- "hasDefault": true,
- "default": "'bistochastic'",
- "limitation": null,
- "ignored": false,
- "docstring": "Method of normalizing and converting singular vectors into biclusters. May be one of 'scale', 'bistochastic', or 'log'. The authors recommend using 'log'. If the data is sparse, however, log normalization will not work, which is why the default is 'bistochastic'. .. warning:: if `method='log'`, the data must be sparse."
- },
- {
- "name": "n_components",
- "type": "int",
- "hasDefault": true,
- "default": "6",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of singular vectors to check."
- },
- {
- "name": "n_best",
- "type": "int",
- "hasDefault": true,
- "default": "3",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of best singular vectors to which to project the data for clustering."
- },
- {
- "name": "svd_method",
- "type": "Literal['randomized', 'arpack']",
- "hasDefault": true,
- "default": "'randomized'",
- "limitation": null,
- "ignored": false,
- "docstring": "Selects the algorithm for finding singular vectors. May be 'randomized' or 'arpack'. If 'randomized', uses :func:`~sklearn.utils.extmath.randomized_svd`, which may be faster for large matrices. If 'arpack', uses `scipy.sparse.linalg.svds`, which is more accurate, but possibly slower in some cases."
- },
- {
- "name": "n_svd_vecs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of vectors to use in calculating the SVD. Corresponds to `ncv` when `svd_method=arpack` and `n_oversamples` when `svd_method` is 'randomized`."
- },
- {
- "name": "mini_batch",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to use mini-batch k-means, which is faster but may get different results."
- },
- {
- "name": "init",
- "type": "Literal['k-means++', 'random']",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Method for initialization of k-means algorithm; defaults to 'k-means++'."
- },
- {
- "name": "n_init",
- "type": "int",
- "hasDefault": true,
- "default": "10",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of random initializations that are tried with the k-means algorithm. If mini-batch k-means is used, the best initialization is chosen and the algorithm runs once. Otherwise, the algorithm is run for each initialization and the best solution chosen."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. deprecated:: 0.23 ``n_jobs`` was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25)."
- },
- {
- "name": "random_state",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Used for randomizing the singular value decomposition and the k-means initialization. Use an int to make the randomness deterministic. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_parameters",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_fit_best_piecewise",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Find the ``n_best`` vectors that are best approximated by piecewise\nconstant vectors.\n\nThe piecewise vectors are found by k-means; the best is chosen\naccording to Euclidean distance."
- },
- {
- "name": "_project_and_cluster",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Project ``data`` to ``vectors`` and cluster the result."
- }
- ],
- "docstring": "Spectral biclustering (Kluger, 2003).\n\nPartitions rows and columns under the assumption that the data has\nan underlying checkerboard structure. For instance, if there are\ntwo row partitions and three column partitions, each row will\nbelong to three biclusters, and each column will belong to two\nbiclusters. The outer product of the corresponding row and column\nlabel vectors gives this checkerboard structure.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int or tuple (n_row_clusters, n_column_clusters), default=3\n The number of row and column clusters in the checkerboard\n structure.\n\nmethod : {'bistochastic', 'scale', 'log'}, default='bistochastic'\n Method of normalizing and converting singular vectors into\n biclusters. May be one of 'scale', 'bistochastic', or 'log'.\n The authors recommend using 'log'. If the data is sparse,\n however, log normalization will not work, which is why the\n default is 'bistochastic'.\n\n .. warning::\n if `method='log'`, the data must be sparse.\n\nn_components : int, default=6\n Number of singular vectors to check.\n\nn_best : int, default=3\n Number of best singular vectors to which to project the data\n for clustering.\n\nsvd_method : {'randomized', 'arpack'}, default='randomized'\n Selects the algorithm for finding singular vectors. May be\n 'randomized' or 'arpack'. If 'randomized', uses\n :func:`~sklearn.utils.extmath.randomized_svd`, which may be faster\n for large matrices. If 'arpack', uses\n `scipy.sparse.linalg.svds`, which is more accurate, but\n possibly slower in some cases.\n\nn_svd_vecs : int, default=None\n Number of vectors to use in calculating the SVD. Corresponds\n to `ncv` when `svd_method=arpack` and `n_oversamples` when\n `svd_method` is 'randomized`.\n\nmini_batch : bool, default=False\n Whether to use mini-batch k-means, which is faster but may get\n different results.\n\ninit : {'k-means++', 'random'} or ndarray of (n_clusters, n_features), default='k-means++'\n Method for initialization of k-means algorithm; defaults to\n 'k-means++'.\n\nn_init : int, default=10\n Number of random initializations that are tried with the\n k-means algorithm.\n\n If mini-batch k-means is used, the best initialization is\n chosen and the algorithm runs once. Otherwise, the algorithm\n is run for each initialization and the best solution chosen.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by breaking\n down the pairwise matrix into n_jobs even slices and computing them in\n parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\nrandom_state : int, RandomState instance, default=None\n Used for randomizing the singular value decomposition and the k-means\n initialization. Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\nAttributes\n----------\nrows_ : array-like of shape (n_row_clusters, n_rows)\n Results of the clustering. `rows[i, r]` is True if\n cluster `i` contains row `r`. Available only after calling ``fit``.\n\ncolumns_ : array-like of shape (n_column_clusters, n_columns)\n Results of the clustering, like `rows`.\n\nrow_labels_ : array-like of shape (n_rows,)\n Row partition labels.\n\ncolumn_labels_ : array-like of shape (n_cols,)\n Column partition labels.\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralBiclustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n... [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralBiclustering(n_clusters=2, random_state=0).fit(X)\n>>> clustering.row_labels_\narray([1, 1, 1, 0, 0, 0], dtype=int32)\n>>> clustering.column_labels_\narray([0, 1], dtype=int32)\n>>> clustering\nSpectralBiclustering(n_clusters=2, random_state=0)\n\nReferences\n----------\n\n* Kluger, Yuval, et. al., 2003. `Spectral biclustering of microarray\n data: coclustering genes and conditions\n `__."
- }
- ],
- "functions": [
- {
- "name": "_scale_normalize",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Normalize ``X`` by scaling rows and columns independently.\n\nReturns the normalized matrix and the row and column scaling\nfactors."
- },
- {
- "name": "_bistochastic_normalize",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Normalize rows and columns of ``X`` simultaneously so that all\nrows sum to one constant and all columns sum to a different\nconstant."
- },
- {
- "name": "_log_normalize",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Normalize ``X`` according to Kluger's log-interactions scheme."
- }
- ]
- },
- {
- "name": "sklearn.cluster._birch",
- "imports": [
- "import warnings",
- "import numbers",
- "import numpy as np",
- "from scipy import sparse",
- "from math import sqrt",
- "from metrics import pairwise_distances_argmin",
- "from metrics.pairwise import euclidean_distances",
- "from base import TransformerMixin",
- "from base import ClusterMixin",
- "from base import BaseEstimator",
- "from utils.extmath import row_norms",
- "from utils.validation import check_is_fitted",
- "from utils.validation import _deprecate_positional_args",
- "from exceptions import ConvergenceWarning",
- "from None import AgglomerativeClustering",
- "from _config import config_context"
- ],
- "classes": [
- {
- "name": "_CFNode",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "threshold",
- "type": "float",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Threshold needed for a new subcluster to enter a CFSubcluster."
- },
- {
- "name": "branching_factor",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum number of CF subclusters in each node."
- },
- {
- "name": "is_leaf",
- "type": "bool",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "We need to know if the CFNode is a leaf or not, in order to retrieve the final subclusters."
- },
- {
- "name": "n_features",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of features."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "append_subcluster",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "update_split_subclusters",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Remove a subcluster from a node and update it with the\nsplit subclusters."
- },
- {
- "name": "insert_cf_subcluster",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Insert a new subcluster into the node."
- }
- ],
- "docstring": "Each node in a CFTree is called a CFNode.\n\nThe CFNode can have a maximum of branching_factor\nnumber of CFSubclusters.\n\nParameters\n----------\nthreshold : float\n Threshold needed for a new subcluster to enter a CFSubcluster.\n\nbranching_factor : int\n Maximum number of CF subclusters in each node.\n\nis_leaf : bool\n We need to know if the CFNode is a leaf or not, in order to\n retrieve the final subclusters.\n\nn_features : int\n The number of features.\n\nAttributes\n----------\nsubclusters_ : list\n List of subclusters for a particular CFNode.\n\nprev_leaf_ : _CFNode\n Useful only if is_leaf is True.\n\nnext_leaf_ : _CFNode\n next_leaf. Useful only if is_leaf is True.\n the final subclusters.\n\ninit_centroids_ : ndarray of shape (branching_factor + 1, n_features)\n Manipulate ``init_centroids_`` throughout rather than centroids_ since\n the centroids are just a view of the ``init_centroids_`` .\n\ninit_sq_norm_ : ndarray of shape (branching_factor + 1,)\n manipulate init_sq_norm_ throughout. similar to ``init_centroids_``.\n\ncentroids_ : ndarray of shape (branching_factor + 1, n_features)\n View of ``init_centroids_``.\n\nsquared_norm_ : ndarray of shape (branching_factor + 1,)\n View of ``init_sq_norm_``."
- },
- {
- "name": "_CFSubcluster",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "linear_sum",
- "type": "NDArray",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample. This is kept optional to allow initialization of empty subclusters."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "update",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "merge_subcluster",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Check if a cluster is worthy enough to be merged. If\nyes then merge."
- },
- {
- "name": "radius",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return radius of the subcluster"
- }
- ],
- "docstring": "Each subcluster in a CFNode is called a CFSubcluster.\n\nA CFSubcluster can have a CFNode has its child.\n\nParameters\n----------\nlinear_sum : ndarray of shape (n_features,), default=None\n Sample. This is kept optional to allow initialization of empty\n subclusters.\n\nAttributes\n----------\nn_samples_ : int\n Number of samples that belong to each subcluster.\n\nlinear_sum_ : ndarray\n Linear sum of all the samples in a subcluster. Prevents holding\n all sample data in memory.\n\nsquared_sum_ : float\n Sum of the squared l2 norms of all samples belonging to a subcluster.\n\ncentroid_ : ndarray of shape (branching_factor + 1, n_features)\n Centroid of the subcluster. Prevent recomputing of centroids when\n ``CFNode.centroids_`` is called.\n\nchild_ : _CFNode\n Child Node of the subcluster. Once a given _CFNode is set as the child\n of the _CFNode, it is set to ``self.child_``.\n\nsq_norm_ : ndarray of shape (branching_factor + 1,)\n Squared norm of the subcluster. Used to prevent recomputing when\n pairwise minimum distances are computed."
- },
- {
- "name": "Birch",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "threshold",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "The radius of the subcluster obtained by merging a new sample and the closest subcluster should be lesser than the threshold. Otherwise a new subcluster is started. Setting this value to be very low promotes splitting and vice-versa."
- },
- {
- "name": "branching_factor",
- "type": "int",
- "hasDefault": true,
- "default": "50",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum number of CF subclusters in each node. If a new samples enters such that the number of subclusters exceed the branching_factor then that node is split into two nodes with the subclusters redistributed in each. The parent subcluster of that node is removed and two new subclusters are added as parents of the 2 split nodes."
- },
- {
- "name": "n_clusters",
- "type": "int",
- "hasDefault": true,
- "default": "3",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of clusters after the final clustering step, which treats the subclusters from the leaves as new samples. - `None` : the final clustering step is not performed and the subclusters are returned as they are. - :mod:`sklearn.cluster` Estimator : If a model is provided, the model is fit treating the subclusters as new samples and the initial data is mapped to the label of the closest subcluster. - `int` : the model fit is :class:`AgglomerativeClustering` with `n_clusters` set to be equal to the int."
- },
- {
- "name": "compute_labels",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether or not to compute labels for each fit."
- },
- {
- "name": "copy",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether or not to make a copy of the given data. If set to False, the initial data will be overwritten."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present here for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Build a CF Tree for the input data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself\n Fitted estimator."
- },
- {
- "name": "_fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_get_leaves",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Retrieve the leaves of the CF Node.\n\nReturns\n-------\nleaves : list of shape (n_leaves,)\n List of the leaf nodes."
- },
- {
- "name": "partial_fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data. If X is not provided, only the global clustering step is done."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present here for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Online learning. Prevents rebuilding of CFTree from scratch.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), default=None\n Input data. If X is not provided, only the global clustering\n step is done.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself\n Fitted estimator."
- },
- {
- "name": "_check_fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Predict data using the ``centroids_`` of subclusters.\n\nAvoid computation of the row norms of X.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nlabels : ndarray of shape(n_samples,)\n Labelled data."
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Transform X into subcluster centroids dimension.\n\nEach dimension represents the distance from the sample point to each\ncluster centroid.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Input data.\n\nReturns\n-------\nX_trans : {array-like, sparse matrix} of shape (n_samples, n_clusters)\n Transformed data."
- },
- {
- "name": "_global_clustering",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Global clustering for the subclusters obtained after fitting"
- }
- ],
- "docstring": "Implements the Birch clustering algorithm.\n\nIt is a memory-efficient, online-learning algorithm provided as an\nalternative to :class:`MiniBatchKMeans`. It constructs a tree\ndata structure with the cluster centroids being read off the leaf.\nThese can be either the final cluster centroids or can be provided as input\nto another clustering algorithm such as :class:`AgglomerativeClustering`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.16\n\nParameters\n----------\nthreshold : float, default=0.5\n The radius of the subcluster obtained by merging a new sample and the\n closest subcluster should be lesser than the threshold. Otherwise a new\n subcluster is started. Setting this value to be very low promotes\n splitting and vice-versa.\n\nbranching_factor : int, default=50\n Maximum number of CF subclusters in each node. If a new samples enters\n such that the number of subclusters exceed the branching_factor then\n that node is split into two nodes with the subclusters redistributed\n in each. The parent subcluster of that node is removed and two new\n subclusters are added as parents of the 2 split nodes.\n\nn_clusters : int, instance of sklearn.cluster model, default=3\n Number of clusters after the final clustering step, which treats the\n subclusters from the leaves as new samples.\n\n - `None` : the final clustering step is not performed and the\n subclusters are returned as they are.\n\n - :mod:`sklearn.cluster` Estimator : If a model is provided, the model\n is fit treating the subclusters as new samples and the initial data\n is mapped to the label of the closest subcluster.\n\n - `int` : the model fit is :class:`AgglomerativeClustering` with\n `n_clusters` set to be equal to the int.\n\ncompute_labels : bool, default=True\n Whether or not to compute labels for each fit.\n\ncopy : bool, default=True\n Whether or not to make a copy of the given data. If set to False,\n the initial data will be overwritten.\n\nAttributes\n----------\nroot_ : _CFNode\n Root of the CFTree.\n\ndummy_leaf_ : _CFNode\n Start pointer to all the leaves.\n\nsubcluster_centers_ : ndarray\n Centroids of all subclusters read directly from the leaves.\n\nsubcluster_labels_ : ndarray\n Labels assigned to the centroids of the subclusters after\n they are clustered globally.\n\nlabels_ : ndarray of shape (n_samples,)\n Array of labels assigned to the input data.\n if partial_fit is used instead of fit, they are assigned to the\n last batch of data.\n\nSee Also\n--------\nMiniBatchKMeans : Alternative implementation that does incremental updates\n of the centers' positions using mini-batches.\n\nNotes\n-----\nThe tree data structure consists of nodes with each node consisting of\na number of subclusters. The maximum number of subclusters in a node\nis determined by the branching factor. Each subcluster maintains a\nlinear sum, squared sum and the number of samples in that subcluster.\nIn addition, each subcluster can also have a node as its child, if the\nsubcluster is not a member of a leaf node.\n\nFor a new point entering the root, it is merged with the subcluster closest\nto it and the linear sum, squared sum and the number of samples of that\nsubcluster are updated. This is done recursively till the properties of\nthe leaf node are updated.\n\nReferences\n----------\n* Tian Zhang, Raghu Ramakrishnan, Maron Livny\n BIRCH: An efficient data clustering method for large databases.\n https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf\n\n* Roberto Perdisci\n JBirch - Java implementation of BIRCH clustering algorithm\n https://code.google.com/archive/p/jbirch\n\nExamples\n--------\n>>> from sklearn.cluster import Birch\n>>> X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]\n>>> brc = Birch(n_clusters=None)\n>>> brc.fit(X)\nBirch(n_clusters=None)\n>>> brc.predict(X)\narray([0, 0, 0, 1, 1, 1])"
- }
- ],
- "functions": [
- {
- "name": "_iterate_sparse_X",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "This little hack returns a densified row when iterating over a sparse\nmatrix, instead of constructing a sparse matrix for every row that is\nexpensive."
- },
- {
- "name": "_split_node",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "The node has to be split if there is no place for a new subcluster\nin the node.\n1. Two empty nodes and two empty subclusters are initialized.\n2. The pair of distant subclusters are found.\n3. The properties of the empty subclusters and nodes are updated\n according to the nearest distance between the subclusters to the\n pair of distant subclusters.\n4. The two nodes are set as children to the two subclusters."
- }
- ]
- },
- {
- "name": "sklearn.cluster._dbscan",
- "imports": [
- "import numpy as np",
- "import warnings",
- "from scipy import sparse",
- "from base import BaseEstimator",
- "from base import ClusterMixin",
- "from utils.validation import _check_sample_weight",
- "from utils.validation import _deprecate_positional_args",
- "from neighbors import NearestNeighbors",
- "from _dbscan_inner import dbscan_inner"
- ],
- "classes": [
- {
- "name": "DBSCAN",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "eps",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "The maximum distance between two samples for one to be considered as in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function."
- },
- {
- "name": "min_samples",
- "type": "int",
- "hasDefault": true,
- "default": "5",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This includes the point itself."
- },
- {
- "name": "metric",
- "type": "str",
- "hasDefault": true,
- "default": "'euclidean'",
- "limitation": null,
- "ignored": false,
- "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by :func:`sklearn.metrics.pairwise_distances` for its metric parameter. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square. X may be a :term:`Glossary `, in which case only \"nonzero\" elements may be considered neighbors for DBSCAN. .. versionadded:: 0.17 metric *precomputed* to accept precomputed sparse matrix."
- },
- {
- "name": "metric_params",
- "type": "Dict",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Additional keyword arguments for the metric function. .. versionadded:: 0.19"
- },
- {
- "name": "algorithm",
- "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']",
- "hasDefault": true,
- "default": "'auto'",
- "limitation": null,
- "ignored": false,
- "docstring": "The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors. See NearestNeighbors module documentation for details."
- },
- {
- "name": "leaf_size",
- "type": "int",
- "hasDefault": true,
- "default": "30",
- "limitation": null,
- "ignored": false,
- "docstring": "Leaf size passed to BallTree or cKDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem."
- },
- {
- "name": "p",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The power of the Minkowski metric to be used to calculate distance between points. If None, then ``p=2`` (equivalent to the Euclidean distance)."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of parallel jobs to run. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training instances to cluster, or distances between instances if ``metric='precomputed'``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Weight of each sample, such that a sample with a weight of at least ``min_samples`` is by itself a core sample; a sample with a negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present here for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Perform DBSCAN clustering from features, or distance matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``metric='precomputed'``. If a sparse matrix is provided, it will\n be converted into a sparse ``csr_matrix``.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with a\n negative weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself"
- },
- {
- "name": "fit_predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training instances to cluster, or distances between instances if ``metric='precomputed'``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Weight of each sample, such that a sample with a weight of at least ``min_samples`` is by itself a core sample; a sample with a negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present here for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Perform DBSCAN clustering from features or distance matrix,\nand return cluster labels.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or (n_samples, n_samples)\n Training instances to cluster, or distances between instances if\n ``metric='precomputed'``. If a sparse matrix is provided, it will\n be converted into a sparse ``csr_matrix``.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with a\n negative weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels. Noisy samples are given the label -1."
- }
- ],
- "docstring": "Perform DBSCAN clustering from vector array or distance matrix.\n\nDBSCAN - Density-Based Spatial Clustering of Applications with Noise.\nFinds core samples of high density and expands clusters from them.\nGood for data which contains clusters of similar density.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\neps : float, default=0.5\n The maximum distance between two samples for one to be considered\n as in the neighborhood of the other. This is not a maximum bound\n on the distances of points within a cluster. This is the most\n important DBSCAN parameter to choose appropriately for your data set\n and distance function.\n\nmin_samples : int, default=5\n The number of samples (or total weight) in a neighborhood for a point\n to be considered as a core point. This includes the point itself.\n\nmetric : string, or callable, default='euclidean'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n its metric parameter.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square. X may be a :term:`Glossary `, in which\n case only \"nonzero\" elements may be considered neighbors for DBSCAN.\n\n .. versionadded:: 0.17\n metric *precomputed* to accept precomputed sparse matrix.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n .. versionadded:: 0.19\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n The algorithm to be used by the NearestNeighbors module\n to compute pointwise distances and find nearest neighbors.\n See NearestNeighbors module documentation for details.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or cKDTree. This can affect the speed\n of the construction and query, as well as the memory required\n to store the tree. The optimal value depends\n on the nature of the problem.\n\np : float, default=None\n The power of the Minkowski metric to be used to calculate distance\n between points. If None, then ``p=2`` (equivalent to the Euclidean\n distance).\n\nn_jobs : int, default=None\n The number of parallel jobs to run.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\ncore_sample_indices_ : ndarray of shape (n_core_samples,)\n Indices of core samples.\n\ncomponents_ : ndarray of shape (n_core_samples, n_features)\n Copy of each core sample found by training.\n\nlabels_ : ndarray of shape (n_samples)\n Cluster labels for each point in the dataset given to fit().\n Noisy samples are given the label -1.\n\nExamples\n--------\n>>> from sklearn.cluster import DBSCAN\n>>> import numpy as np\n>>> X = np.array([[1, 2], [2, 2], [2, 3],\n... [8, 7], [8, 8], [25, 80]])\n>>> clustering = DBSCAN(eps=3, min_samples=2).fit(X)\n>>> clustering.labels_\narray([ 0, 0, 0, 1, 1, -1])\n>>> clustering\nDBSCAN(eps=3, min_samples=2)\n\nSee Also\n--------\nOPTICS : A similar clustering at multiple values of eps. Our implementation\n is optimized for memory usage.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_dbscan.py\n`.\n\nThis implementation bulk-computes all neighborhood queries, which increases\nthe memory complexity to O(n.d) where d is the average number of neighbors,\nwhile original DBSCAN had memory complexity O(n). It may attract a higher\nmemory complexity when querying these nearest neighborhoods, depending\non the ``algorithm``.\n\nOne way to avoid the query complexity is to pre-compute sparse\nneighborhoods in chunks using\n:func:`NearestNeighbors.radius_neighbors_graph\n` with\n``mode='distance'``, then using ``metric='precomputed'`` here.\n\nAnother way to reduce memory and computation time is to remove\n(near-)duplicate points and use ``sample_weight`` instead.\n\n:class:`cluster.OPTICS` provides a similar clustering with lower memory\nusage.\n\nReferences\n----------\nEster, M., H. P. Kriegel, J. Sander, and X. Xu, \"A Density-Based\nAlgorithm for Discovering Clusters in Large Spatial Databases with Noise\".\nIn: Proceedings of the 2nd International Conference on Knowledge Discovery\nand Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\nSchubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\nDBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\nACM Transactions on Database Systems (TODS), 42(3), 19."
- }
- ],
- "functions": [
- {
- "name": "dbscan",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "A feature array, or array of distances between samples if ``metric='precomputed'``."
- },
- {
- "name": "eps",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "The maximum distance between two samples for one to be considered as in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function."
- },
- {
- "name": "min_samples",
- "type": "int",
- "hasDefault": true,
- "default": "5",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This includes the point itself."
- },
- {
- "name": "metric",
- "type": "Union[Callable, str]",
- "hasDefault": true,
- "default": "'minkowski'",
- "limitation": null,
- "ignored": false,
- "docstring": "The metric to use when calculating distance between instances in a feature array. If metric is a string or callable, it must be one of the options allowed by :func:`sklearn.metrics.pairwise_distances` for its metric parameter. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph `, in which case only \"nonzero\" elements may be considered neighbors."
- },
- {
- "name": "metric_params",
- "type": "Dict",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Additional keyword arguments for the metric function. .. versionadded:: 0.19"
- },
- {
- "name": "algorithm",
- "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']",
- "hasDefault": true,
- "default": "'auto'",
- "limitation": null,
- "ignored": false,
- "docstring": "The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors. See NearestNeighbors module documentation for details."
- },
- {
- "name": "leaf_size",
- "type": "int",
- "hasDefault": true,
- "default": "30",
- "limitation": null,
- "ignored": false,
- "docstring": "Leaf size passed to BallTree or cKDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem."
- },
- {
- "name": "p",
- "type": "float",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "The power of the Minkowski metric to be used to calculate distance between points."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Weight of each sample, such that a sample with a weight of at least ``min_samples`` is by itself a core sample; a sample with negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. If precomputed distance are used, parallel execution is not available and thus n_jobs will have no effect."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Perform DBSCAN clustering from vector array or distance matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse (CSR) matrix} of shape (n_samples, n_features) or (n_samples, n_samples)\n A feature array, or array of distances between samples if\n ``metric='precomputed'``.\n\neps : float, default=0.5\n The maximum distance between two samples for one to be considered\n as in the neighborhood of the other. This is not a maximum bound\n on the distances of points within a cluster. This is the most\n important DBSCAN parameter to choose appropriately for your data set\n and distance function.\n\nmin_samples : int, default=5\n The number of samples (or total weight) in a neighborhood for a point\n to be considered as a core point. This includes the point itself.\n\nmetric : str or callable, default='minkowski'\n The metric to use when calculating distance between instances in a\n feature array. If metric is a string or callable, it must be one of\n the options allowed by :func:`sklearn.metrics.pairwise_distances` for\n its metric parameter.\n If metric is \"precomputed\", X is assumed to be a distance matrix and\n must be square during fit.\n X may be a :term:`sparse graph `,\n in which case only \"nonzero\" elements may be considered neighbors.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\n .. versionadded:: 0.19\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n The algorithm to be used by the NearestNeighbors module\n to compute pointwise distances and find nearest neighbors.\n See NearestNeighbors module documentation for details.\n\nleaf_size : int, default=30\n Leaf size passed to BallTree or cKDTree. This can affect the speed\n of the construction and query, as well as the memory required\n to store the tree. The optimal value depends\n on the nature of the problem.\n\np : float, default=2\n The power of the Minkowski metric to be used to calculate distance\n between points.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Weight of each sample, such that a sample with a weight of at least\n ``min_samples`` is by itself a core sample; a sample with negative\n weight may inhibit its eps-neighbor from being core.\n Note that weights are absolute, and default to 1.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search. ``None`` means\n 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means\n using all processors. See :term:`Glossary ` for more details.\n If precomputed distance are used, parallel execution is not available\n and thus n_jobs will have no effect.\n\nReturns\n-------\ncore_samples : ndarray of shape (n_core_samples,)\n Indices of core samples.\n\nlabels : ndarray of shape (n_samples,)\n Cluster labels for each point. Noisy samples are given the label -1.\n\nSee Also\n--------\nDBSCAN : An estimator interface for this clustering algorithm.\nOPTICS : A similar estimator interface clustering at multiple values of\n eps. Our implementation is optimized for memory usage.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_dbscan.py\n`.\n\nThis implementation bulk-computes all neighborhood queries, which increases\nthe memory complexity to O(n.d) where d is the average number of neighbors,\nwhile original DBSCAN had memory complexity O(n). It may attract a higher\nmemory complexity when querying these nearest neighborhoods, depending\non the ``algorithm``.\n\nOne way to avoid the query complexity is to pre-compute sparse\nneighborhoods in chunks using\n:func:`NearestNeighbors.radius_neighbors_graph\n` with\n``mode='distance'``, then using ``metric='precomputed'`` here.\n\nAnother way to reduce memory and computation time is to remove\n(near-)duplicate points and use ``sample_weight`` instead.\n\n:func:`cluster.optics ` provides a similar\nclustering with lower memory usage.\n\nReferences\n----------\nEster, M., H. P. Kriegel, J. Sander, and X. Xu, \"A Density-Based\nAlgorithm for Discovering Clusters in Large Spatial Databases with Noise\".\nIn: Proceedings of the 2nd International Conference on Knowledge Discovery\nand Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996\n\nSchubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).\nDBSCAN revisited, revisited: why and how you should (still) use DBSCAN.\nACM Transactions on Database Systems (TODS), 42(3), 19."
- }
- ]
- },
- {
- "name": "sklearn.cluster._feature_agglomeration",
- "imports": [
- "import numpy as np",
- "from base import TransformerMixin",
- "from utils.validation import check_is_fitted",
- "from scipy.sparse import issparse"
- ],
- "classes": [
- {
- "name": "AgglomerationTransform",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "A M by N array of M observations in N dimensions or a length M array of M one-dimensional observations."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Transform a new matrix using the built clustering\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features) or (n_samples,)\n A M by N array of M observations in N dimensions or a length\n M array of M one-dimensional observations.\n\nReturns\n-------\nY : ndarray of shape (n_samples, n_clusters) or (n_clusters,)\n The pooled values for each feature cluster."
- },
- {
- "name": "inverse_transform",
- "decorators": [],
- "parameters": [
- {
- "name": "Xred",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The values to be assigned to each cluster of samples"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Inverse the transformation.\nReturn a vector of size nb_features with the values of Xred assigned\nto each group of features\n\nParameters\n----------\nXred : array-like of shape (n_samples, n_clusters) or (n_clusters,)\n The values to be assigned to each cluster of samples\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features) or (n_features,)\n A vector of size n_samples with the values of Xred assigned to\n each of the cluster of samples."
- }
- ],
- "docstring": "A class for feature agglomeration via the transform interface"
- }
- ],
- "functions": []
- },
- {
- "name": "sklearn.cluster._kmeans",
- "imports": [
- "import warnings",
- "import numpy as np",
- "import scipy.sparse as sp",
- "from threadpoolctl import threadpool_limits",
- "from threadpoolctl import threadpool_info",
- "from base import BaseEstimator",
- "from base import ClusterMixin",
- "from base import TransformerMixin",
- "from metrics.pairwise import euclidean_distances",
- "from utils.extmath import row_norms",
- "from utils.extmath import stable_cumsum",
- "from utils.sparsefuncs_fast import assign_rows_csr",
- "from utils.sparsefuncs import mean_variance_axis",
- "from utils.validation import _deprecate_positional_args",
- "from utils import check_array",
- "from utils import gen_batches",
- "from utils import check_random_state",
- "from utils import deprecated",
- "from utils.validation import check_is_fitted",
- "from utils.validation import _check_sample_weight",
- "from utils._openmp_helpers import _openmp_effective_n_threads",
- "from exceptions import ConvergenceWarning",
- "from _k_means_fast import CHUNK_SIZE",
- "from _k_means_fast import _inertia_dense",
- "from _k_means_fast import _inertia_sparse",
- "from _k_means_fast import _mini_batch_update_csr",
- "from _k_means_lloyd import lloyd_iter_chunked_dense",
- "from _k_means_lloyd import lloyd_iter_chunked_sparse",
- "from _k_means_elkan import init_bounds_dense",
- "from _k_means_elkan import init_bounds_sparse",
- "from _k_means_elkan import elkan_iter_chunked_dense",
- "from _k_means_elkan import elkan_iter_chunked_sparse"
- ],
- "classes": [
- {
- "name": "KMeans",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "n_clusters",
- "type": "int",
- "hasDefault": true,
- "default": "8",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of clusters to form as well as the number of centroids to generate."
- },
- {
- "name": "init",
- "type": "Literal['k-means++', 'random']",
- "hasDefault": true,
- "default": "'k-means++'",
- "limitation": null,
- "ignored": false,
- "docstring": "Method for initialization: 'k-means++' : selects initial cluster centers for k-mean clustering in a smart way to speed up convergence. See section Notes in k_init for more details. 'random': choose `n_clusters` observations (rows) at random from data for the initial centroids. If an array is passed, it should be of shape (n_clusters, n_features) and gives the initial centers. If a callable is passed, it should take arguments X, n_clusters and a random state and return an initialization."
- },
- {
- "name": "n_init",
- "type": "int",
- "hasDefault": true,
- "default": "10",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia."
- },
- {
- "name": "max_iter",
- "type": "int",
- "hasDefault": true,
- "default": "300",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum number of iterations of the k-means algorithm for a single run."
- },
- {
- "name": "tol",
- "type": "float",
- "hasDefault": true,
- "default": "1e-4",
- "limitation": null,
- "ignored": false,
- "docstring": "Relative tolerance with regards to Frobenius norm of the difference in the cluster centers of two consecutive iterations to declare convergence."
- },
- {
- "name": "precompute_distances",
- "type": "Literal['auto', True, False]",
- "hasDefault": true,
- "default": "'auto'",
- "limitation": null,
- "ignored": false,
- "docstring": "Precompute distances (faster but takes more memory). 'auto' : do not precompute distances if n_samples * n_clusters > 12 million. This corresponds to about 100MB overhead per job using double precision. True : always precompute distances. False : never precompute distances. .. deprecated:: 0.23 'precompute_distances' was deprecated in version 0.22 and will be removed in 1.0 (renaming of 0.25). It has no effect."
- },
- {
- "name": "verbose",
- "type": "int",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Verbosity mode."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for centroid initialization. Use an int to make the randomness deterministic. See :term:`Glossary `."
- },
- {
- "name": "copy_x",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "When pre-computing distances it is more numerically accurate to center the data first. If copy_x is True (default), then the original data is not modified. If False, the original data is modified, and put back before the function returns, but small numerical differences may be introduced by subtracting and then adding the data mean. Note that if the original data is not C-contiguous, a copy will be made even if copy_x is False. If the original data is sparse, but not in CSR format, a copy will be made even if copy_x is False."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of OpenMP threads to use for the computation. Parallelism is sample-wise on the main cython loop which assigns each sample to its closest center. ``None`` or ``-1`` means using all processors. .. deprecated:: 0.23 ``n_jobs`` was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25)."
- },
- {
- "name": "algorithm",
- "type": "Literal[\"auto\", \"full\", \"elkan\"]",
- "hasDefault": true,
- "default": "\"auto\"",
- "limitation": null,
- "ignored": false,
- "docstring": "K-means algorithm to use. The classical EM-style algorithm is \"full\". The \"elkan\" variation is more efficient on data with well-defined clusters, by using the triangle inequality. However it's more memory intensive due to the allocation of an extra array of shape (n_samples, n_clusters). For now \"auto\" (kept for backward compatibiliy) chooses \"elkan\" but it might change in the future for a better heuristic. .. versionchanged:: 0.18 Added Elkan algorithm"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_params",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_validate_center_shape",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Check if centers is compatible with X and n_clusters."
- },
- {
- "name": "_check_test_data",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_mkl_vcomp",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Warns when vcomp and mkl are both present"
- },
- {
- "name": "_init_centroids",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The input samples."
- },
- {
- "name": "x_squared_norms",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Squared euclidean norm of each data point. Pass it if you have it at hands already to avoid it being recomputed here."
- },
- {
- "name": "init",
- "type": "Literal['k-means++', 'random']",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Method for initialization."
- },
- {
- "name": "random_state",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for centroid initialization. See :term:`Glossary `."
- },
- {
- "name": "init_size",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of samples to randomly sample for speeding up the initialization (sometimes at the expense of accuracy)."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Compute the initial centroids.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples.\n\nx_squared_norms : ndarray of shape (n_samples,)\n Squared euclidean norm of each data point. Pass it if you have it\n at hands already to avoid it being recomputed here.\n\ninit : {'k-means++', 'random'}, callable or ndarray of shape (n_clusters, n_features)\n Method for initialization.\n\nrandom_state : RandomState instance\n Determines random number generation for centroid initialization.\n See :term:`Glossary `.\n\ninit_size : int, default=None\n Number of samples to randomly sample for speeding up the\n initialization (sometimes at the expense of accuracy).\n\nReturns\n-------\ncenters : ndarray of shape (n_clusters, n_features)"
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training instances to cluster. It must be noted that the data will be converted to C ordering, which will cause a memory copy if the given data is not C-contiguous. If a sparse matrix is passed, a copy will be made if it's not in CSR format."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present here for API consistency by convention."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight. .. versionadded:: 0.20"
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Compute k-means clustering.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training instances to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory\n copy if the given data is not C-contiguous.\n If a sparse matrix is passed, a copy will be made if it's not in\n CSR format.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\n .. versionadded:: 0.20\n\nReturns\n-------\nself\n Fitted estimator."
- },
- {
- "name": "fit_predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "New data to transform."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present here for API consistency by convention."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Compute cluster centers and predict cluster index for each sample.\n\nConvenience method; equivalent to calling fit(X) followed by\npredict(X).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to."
- },
- {
- "name": "fit_transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "New data to transform."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present here for API consistency by convention."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Compute clustering and transform X to cluster-distance space.\n\nEquivalent to fit(X).transform(X), but more efficiently implemented.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_clusters)\n X transformed in the new space."
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "New data to transform."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Transform X to a cluster-distance space.\n\nIn the new space, each dimension is the distance to the cluster\ncenters. Note that even if X is sparse, the array returned by\n`transform` will typically be dense.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to transform.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_clusters)\n X transformed in the new space."
- },
- {
- "name": "_transform",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Guts of transform method; no input validation."
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "New data to predict."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Predict the closest cluster each sample in X belongs to.\n\nIn the vector quantization literature, `cluster_centers_` is called\nthe code book and each value returned by `predict` is the index of\nthe closest code in the code book.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to."
- },
- {
- "name": "score",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "New data."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present here for API consistency by convention."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Opposite of the value of X on the K-means objective.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\nReturns\n-------\nscore : float\n Opposite of the value of X on the K-means objective."
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "K-Means clustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nn_clusters : int, default=8\n The number of clusters to form as well as the number of\n centroids to generate.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape (n_clusters, n_features), default='k-means++'\n Method for initialization:\n\n 'k-means++' : selects initial cluster centers for k-mean\n clustering in a smart way to speed up convergence. See section\n Notes in k_init for more details.\n\n 'random': choose `n_clusters` observations (rows) at random from data\n for the initial centroids.\n\n If an array is passed, it should be of shape (n_clusters, n_features)\n and gives the initial centers.\n\n If a callable is passed, it should take arguments X, n_clusters and a\n random state and return an initialization.\n\nn_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of\n n_init consecutive runs in terms of inertia.\n\nmax_iter : int, default=300\n Maximum number of iterations of the k-means algorithm for a\n single run.\n\ntol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n\nprecompute_distances : {'auto', True, False}, default='auto'\n Precompute distances (faster but takes more memory).\n\n 'auto' : do not precompute distances if n_samples * n_clusters > 12\n million. This corresponds to about 100MB overhead per job using\n double precision.\n\n True : always precompute distances.\n\n False : never precompute distances.\n\n .. deprecated:: 0.23\n 'precompute_distances' was deprecated in version 0.22 and will be\n removed in 1.0 (renaming of 0.25). It has no effect.\n\nverbose : int, default=0\n Verbosity mode.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization. Use\n an int to make the randomness deterministic.\n See :term:`Glossary `.\n\ncopy_x : bool, default=True\n When pre-computing distances it is more numerically accurate to center\n the data first. If copy_x is True (default), then the original data is\n not modified. If False, the original data is modified, and put back\n before the function returns, but small numerical differences may be\n introduced by subtracting and then adding the data mean. Note that if\n the original data is not C-contiguous, a copy will be made even if\n copy_x is False. If the original data is sparse, but not in CSR format,\n a copy will be made even if copy_x is False.\n\nn_jobs : int, default=None\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\n ``None`` or ``-1`` means using all processors.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\nalgorithm : {\"auto\", \"full\", \"elkan\"}, default=\"auto\"\n K-means algorithm to use. The classical EM-style algorithm is \"full\".\n The \"elkan\" variation is more efficient on data with well-defined\n clusters, by using the triangle inequality. However it's more memory\n intensive due to the allocation of an extra array of shape\n (n_samples, n_clusters).\n\n For now \"auto\" (kept for backward compatibiliy) chooses \"elkan\" but it\n might change in the future for a better heuristic.\n\n .. versionchanged:: 0.18\n Added Elkan algorithm\n\nAttributes\n----------\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers. If the algorithm stops before fully\n converging (see ``tol`` and ``max_iter``), these will not be\n consistent with ``labels_``.\n\nlabels_ : ndarray of shape (n_samples,)\n Labels of each point\n\ninertia_ : float\n Sum of squared distances of samples to their closest cluster center.\n\nn_iter_ : int\n Number of iterations run.\n\nSee Also\n--------\nMiniBatchKMeans : Alternative online implementation that does incremental\n updates of the centers positions using mini-batches.\n For large scale learning (say n_samples > 10k) MiniBatchKMeans is\n probably much faster than the default batch implementation.\n\nNotes\n-----\nThe k-means problem is solved using either Lloyd's or Elkan's algorithm.\n\nThe average complexity is given by O(k n T), were n is the number of\nsamples and T is the number of iteration.\n\nThe worst case complexity is given by O(n^(k+2/p)) with\nn = n_samples, p = n_features. (D. Arthur and S. Vassilvitskii,\n'How slow is the k-means method?' SoCG2006)\n\nIn practice, the k-means algorithm is very fast (one of the fastest\nclustering algorithms available), but it falls in local minima. That's why\nit can be useful to restart it several times.\n\nIf the algorithm stops before fully converging (because of ``tol`` or\n``max_iter``), ``labels_`` and ``cluster_centers_`` will not be consistent,\ni.e. the ``cluster_centers_`` will not be the means of the points in each\ncluster. Also, the estimator will reassign ``labels_`` after the last\niteration to make ``labels_`` consistent with ``predict`` on the training\nset.\n\nExamples\n--------\n\n>>> from sklearn.cluster import KMeans\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [10, 2], [10, 4], [10, 0]])\n>>> kmeans = KMeans(n_clusters=2, random_state=0).fit(X)\n>>> kmeans.labels_\narray([1, 1, 1, 0, 0, 0], dtype=int32)\n>>> kmeans.predict([[0, 0], [12, 3]])\narray([1, 0], dtype=int32)\n>>> kmeans.cluster_centers_\narray([[10., 2.],\n [ 1., 2.]])"
- },
- {
- "name": "MiniBatchKMeans",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "n_clusters",
- "type": "int",
- "hasDefault": true,
- "default": "8",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of clusters to form as well as the number of centroids to generate."
- },
- {
- "name": "init",
- "type": "Literal['k-means++', 'random']",
- "hasDefault": true,
- "default": "'k-means++'",
- "limitation": null,
- "ignored": false,
- "docstring": "Method for initialization: 'k-means++' : selects initial cluster centers for k-mean clustering in a smart way to speed up convergence. See section Notes in k_init for more details. 'random': choose `n_clusters` observations (rows) at random from data for the initial centroids. If an array is passed, it should be of shape (n_clusters, n_features) and gives the initial centers. If a callable is passed, it should take arguments X, n_clusters and a random state and return an initialization."
- },
- {
- "name": "max_iter",
- "type": "int",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum number of iterations over the complete dataset before stopping independently of any early stopping criterion heuristics."
- },
- {
- "name": "batch_size",
- "type": "int",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "Size of the mini batches."
- },
- {
- "name": "verbose",
- "type": "int",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Verbosity mode."
- },
- {
- "name": "compute_labels",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Compute label assignment and inertia for the complete dataset once the minibatch optimization has converged in fit."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for centroid initialization and random reassignment. Use an int to make the randomness deterministic. See :term:`Glossary `."
- },
- {
- "name": "tol",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Control early stopping based on the relative center changes as measured by a smoothed, variance-normalized of the mean center squared position changes. This early stopping heuristics is closer to the one used for the batch variant of the algorithms but induces a slight computational and memory overhead over the inertia heuristic. To disable convergence detection based on normalized center change, set tol to 0.0 (default)."
- },
- {
- "name": "max_no_improvement",
- "type": "int",
- "hasDefault": true,
- "default": "10",
- "limitation": null,
- "ignored": false,
- "docstring": "Control early stopping based on the consecutive number of mini batches that does not yield an improvement on the smoothed inertia. To disable convergence detection based on inertia, set max_no_improvement to None."
- },
- {
- "name": "init_size",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of samples to randomly sample for speeding up the initialization (sometimes at the expense of accuracy): the only algorithm is initialized by running a batch KMeans on a random subset of the data. This needs to be larger than n_clusters. If `None`, `init_size= 3 * batch_size`."
- },
- {
- "name": "n_init",
- "type": "int",
- "hasDefault": true,
- "default": "3",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of random initializations that are tried. In contrast to KMeans, the algorithm is only run once, using the best of the ``n_init`` initializations as measured by inertia."
- },
- {
- "name": "reassignment_ratio",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Control the fraction of the maximum number of counts for a center to be reassigned. A higher value means that low count centers are more easily reassigned, which means that the model will take longer to converge, but should converge in a better clustering."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "counts_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "init_size_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "random_state_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_params",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training instances to cluster. It must be noted that the data will be converted to C ordering, which will cause a memory copy if the given data is not C-contiguous."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present here for API consistency by convention."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight (default: None). .. versionadded:: 0.20"
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Compute the centroids on X by chunking it into mini-batches.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training instances to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory copy\n if the given data is not C-contiguous.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\n .. versionadded:: 0.20\n\nReturns\n-------\nself"
- },
- {
- "name": "_labels_inertia_minibatch",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The weights for each observation in X."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Compute labels and inertia using mini batches.\n\nThis is slightly slower than doing everything at once but prevents\nmemory errors / segfaults.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nsample_weight : array-like of shape (n_samples,)\n The weights for each observation in X.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels for each point.\n\ninertia : float\n Sum of squared distances of points to nearest cluster."
- },
- {
- "name": "partial_fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Coordinates of the data points to cluster. It must be noted that X will be copied if it is not C-contiguous."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present here for API consistency by convention."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight (default: None)."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Update k means estimate on a single mini-batch X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Coordinates of the data points to cluster. It must be noted that\n X will be copied if it is not C-contiguous.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\nReturns\n-------\nself"
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "New data to predict."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight (default: None)."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Predict the closest cluster each sample in X belongs to.\n\nIn the vector quantization literature, `cluster_centers_` is called\nthe code book and each value returned by `predict` is the index of\nthe closest code in the code book.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight (default: None).\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to."
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Mini-Batch K-Means clustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nn_clusters : int, default=8\n The number of clusters to form as well as the number of\n centroids to generate.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape (n_clusters, n_features), default='k-means++'\n Method for initialization:\n\n 'k-means++' : selects initial cluster centers for k-mean\n clustering in a smart way to speed up convergence. See section\n Notes in k_init for more details.\n\n 'random': choose `n_clusters` observations (rows) at random from data\n for the initial centroids.\n\n If an array is passed, it should be of shape (n_clusters, n_features)\n and gives the initial centers.\n\n If a callable is passed, it should take arguments X, n_clusters and a\n random state and return an initialization.\n\nmax_iter : int, default=100\n Maximum number of iterations over the complete dataset before\n stopping independently of any early stopping criterion heuristics.\n\nbatch_size : int, default=100\n Size of the mini batches.\n\nverbose : int, default=0\n Verbosity mode.\n\ncompute_labels : bool, default=True\n Compute label assignment and inertia for the complete dataset\n once the minibatch optimization has converged in fit.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization and\n random reassignment. Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\ntol : float, default=0.0\n Control early stopping based on the relative center changes as\n measured by a smoothed, variance-normalized of the mean center\n squared position changes. This early stopping heuristics is\n closer to the one used for the batch variant of the algorithms\n but induces a slight computational and memory overhead over the\n inertia heuristic.\n\n To disable convergence detection based on normalized center\n change, set tol to 0.0 (default).\n\nmax_no_improvement : int, default=10\n Control early stopping based on the consecutive number of mini\n batches that does not yield an improvement on the smoothed inertia.\n\n To disable convergence detection based on inertia, set\n max_no_improvement to None.\n\ninit_size : int, default=None\n Number of samples to randomly sample for speeding up the\n initialization (sometimes at the expense of accuracy): the\n only algorithm is initialized by running a batch KMeans on a\n random subset of the data. This needs to be larger than n_clusters.\n\n If `None`, `init_size= 3 * batch_size`.\n\nn_init : int, default=3\n Number of random initializations that are tried.\n In contrast to KMeans, the algorithm is only run once, using the\n best of the ``n_init`` initializations as measured by inertia.\n\nreassignment_ratio : float, default=0.01\n Control the fraction of the maximum number of counts for a\n center to be reassigned. A higher value means that low count\n centers are more easily reassigned, which means that the\n model will take longer to converge, but should converge in a\n better clustering.\n\nAttributes\n----------\n\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers.\n\nlabels_ : int\n Labels of each point (if compute_labels is set to True).\n\ninertia_ : float\n The value of the inertia criterion associated with the chosen\n partition (if compute_labels is set to True). The inertia is\n defined as the sum of square distances of samples to their nearest\n neighbor.\n\nn_iter_ : int\n Number of batches processed.\n\ncounts_ : ndarray of shape (n_clusters,)\n Weigth sum of each cluster.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will be removed in\n 1.1 (renaming of 0.26).\n\ninit_size_ : int\n The effective number of samples used for the initialization.\n\n .. deprecated:: 0.24\n This attribute is deprecated in 0.24 and will be removed in\n 1.1 (renaming of 0.26).\n\nSee Also\n--------\nKMeans : The classic implementation of the clustering method based on the\n Lloyd's algorithm. It consumes the whole set of input data at each\n iteration.\n\nNotes\n-----\nSee https://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf\n\nExamples\n--------\n>>> from sklearn.cluster import MiniBatchKMeans\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [4, 2], [4, 0], [4, 4],\n... [4, 5], [0, 1], [2, 2],\n... [3, 2], [5, 5], [1, -1]])\n>>> # manually fit on batches\n>>> kmeans = MiniBatchKMeans(n_clusters=2,\n... random_state=0,\n... batch_size=6)\n>>> kmeans = kmeans.partial_fit(X[0:6,:])\n>>> kmeans = kmeans.partial_fit(X[6:12,:])\n>>> kmeans.cluster_centers_\narray([[2. , 1. ],\n [3.5, 4.5]])\n>>> kmeans.predict([[0, 0], [4, 4]])\narray([0, 1], dtype=int32)\n>>> # fit on the whole data\n>>> kmeans = MiniBatchKMeans(n_clusters=2,\n... random_state=0,\n... batch_size=6,\n... max_iter=10).fit(X)\n>>> kmeans.cluster_centers_\narray([[3.95918367, 2.40816327],\n [1.12195122, 1.3902439 ]])\n>>> kmeans.predict([[0, 0], [4, 4]])\narray([1, 0], dtype=int32)"
- }
- ],
- "functions": [
- {
- "name": "_kmeans_plusplus",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The data to pick seeds for."
- },
- {
- "name": "n_clusters",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of seeds to choose."
- },
- {
- "name": "x_squared_norms",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Squared Euclidean norm of each data point."
- },
- {
- "name": "random_state",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The generator used to initialize the centers. See :term:`Glossary `."
- },
- {
- "name": "n_local_trials",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of seeding trials for each center (except the first), of which the one reducing inertia the most is greedily chosen. Set to None to make the number of trials depend logarithmically on the number of seeds (2+log(k)); this is the default."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Computational component for initialization of n_clusters by\nk-means++. Prior validation of data is assumed.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The data to pick seeds for.\n\nn_clusters : int\n The number of seeds to choose.\n\nx_squared_norms : ndarray of shape (n_samples,)\n Squared Euclidean norm of each data point.\n\nrandom_state : RandomState instance\n The generator used to initialize the centers.\n See :term:`Glossary `.\n\nn_local_trials : int, default=None\n The number of seeding trials for each center (except the first),\n of which the one reducing inertia the most is greedily chosen.\n Set to None to make the number of trials depend logarithmically\n on the number of seeds (2+log(k)); this is the default.\n\nReturns\n-------\ncenters : ndarray of shape (n_clusters, n_features)\n The inital centers for k-means.\n\nindices : ndarray of shape (n_clusters,)\n The index location of the chosen centers in the data array X. For a\n given index and center, X[index] = center."
- },
- {
- "name": "_tolerance",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return a tolerance which is independent of the dataset."
- },
- {
- "name": "k_means",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The observations to cluster. It must be noted that the data will be converted to C ordering, which will cause a memory copy if the given data is not C-contiguous."
- },
- {
- "name": "n_clusters",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of clusters to form as well as the number of centroids to generate."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The weights for each observation in X. If None, all observations are assigned equal weight."
- },
- {
- "name": "init",
- "type": "Literal['k-means++', 'random']",
- "hasDefault": true,
- "default": "'k-means++'",
- "limitation": null,
- "ignored": false,
- "docstring": "Method for initialization: 'k-means++' : selects initial cluster centers for k-mean clustering in a smart way to speed up convergence. See section Notes in k_init for more details. 'random': choose `n_clusters` observations (rows) at random from data for the initial centroids. If an array is passed, it should be of shape (n_clusters, n_features) and gives the initial centers. If a callable is passed, it should take arguments X, n_clusters and a random state and return an initialization."
- },
- {
- "name": "precompute_distances",
- "type": "Literal['auto', True, False]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Precompute distances (faster but takes more memory). 'auto' : do not precompute distances if n_samples * n_clusters > 12 million. This corresponds to about 100MB overhead per job using double precision. True : always precompute distances False : never precompute distances .. deprecated:: 0.23 'precompute_distances' was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25). It has no effect."
- },
- {
- "name": "n_init",
- "type": "int",
- "hasDefault": true,
- "default": "10",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia."
- },
- {
- "name": "max_iter",
- "type": "int",
- "hasDefault": true,
- "default": "300",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum number of iterations of the k-means algorithm to run."
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Verbosity mode."
- },
- {
- "name": "tol",
- "type": "float",
- "hasDefault": true,
- "default": "1e-4",
- "limitation": null,
- "ignored": false,
- "docstring": "Relative tolerance with regards to Frobenius norm of the difference in the cluster centers of two consecutive iterations to declare convergence."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for centroid initialization. Use an int to make the randomness deterministic. See :term:`Glossary `."
- },
- {
- "name": "copy_x",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "When pre-computing distances it is more numerically accurate to center the data first. If copy_x is True (default), then the original data is not modified. If False, the original data is modified, and put back before the function returns, but small numerical differences may be introduced by subtracting and then adding the data mean. Note that if the original data is not C-contiguous, a copy will be made even if copy_x is False. If the original data is sparse, but not in CSR format, a copy will be made even if copy_x is False."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of OpenMP threads to use for the computation. Parallelism is sample-wise on the main cython loop which assigns each sample to its closest center. ``None`` or ``-1`` means using all processors. .. deprecated:: 0.23 ``n_jobs`` was deprecated in version 0.23 and will be removed in 1.0 (renaming of 0.25)."
- },
- {
- "name": "algorithm",
- "type": "Literal[\"auto\", \"full\", \"elkan\"]",
- "hasDefault": true,
- "default": "\"auto\"",
- "limitation": null,
- "ignored": false,
- "docstring": "K-means algorithm to use. The classical EM-style algorithm is \"full\". The \"elkan\" variation is more efficient on data with well-defined clusters, by using the triangle inequality. However it's more memory intensive due to the allocation of an extra array of shape (n_samples, n_clusters). For now \"auto\" (kept for backward compatibiliy) chooses \"elkan\" but it might change in the future for a better heuristic."
- },
- {
- "name": "return_n_iter",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether or not to return the number of iterations."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "K-means clustering algorithm.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The observations to cluster. It must be noted that the data\n will be converted to C ordering, which will cause a memory copy\n if the given data is not C-contiguous.\n\nn_clusters : int\n The number of clusters to form as well as the number of\n centroids to generate.\n\nsample_weight : array-like of shape (n_samples,), default=None\n The weights for each observation in X. If None, all observations\n are assigned equal weight.\n\ninit : {'k-means++', 'random'}, callable or array-like of shape (n_clusters, n_features), default='k-means++'\n Method for initialization:\n\n 'k-means++' : selects initial cluster centers for k-mean\n clustering in a smart way to speed up convergence. See section\n Notes in k_init for more details.\n\n 'random': choose `n_clusters` observations (rows) at random from data\n for the initial centroids.\n\n If an array is passed, it should be of shape (n_clusters, n_features)\n and gives the initial centers.\n\n If a callable is passed, it should take arguments X, n_clusters and a\n random state and return an initialization.\n\nprecompute_distances : {'auto', True, False}\n Precompute distances (faster but takes more memory).\n\n 'auto' : do not precompute distances if n_samples * n_clusters > 12\n million. This corresponds to about 100MB overhead per job using\n double precision.\n\n True : always precompute distances\n\n False : never precompute distances\n\n .. deprecated:: 0.23\n 'precompute_distances' was deprecated in version 0.23 and will be\n removed in 1.0 (renaming of 0.25). It has no effect.\n\nn_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of\n n_init consecutive runs in terms of inertia.\n\nmax_iter : int, default=300\n Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n Verbosity mode.\n\ntol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization. Use\n an int to make the randomness deterministic.\n See :term:`Glossary `.\n\ncopy_x : bool, default=True\n When pre-computing distances it is more numerically accurate to center\n the data first. If copy_x is True (default), then the original data is\n not modified. If False, the original data is modified, and put back\n before the function returns, but small numerical differences may be\n introduced by subtracting and then adding the data mean. Note that if\n the original data is not C-contiguous, a copy will be made even if\n copy_x is False. If the original data is sparse, but not in CSR format,\n a copy will be made even if copy_x is False.\n\nn_jobs : int, default=None\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\n ``None`` or ``-1`` means using all processors.\n\n .. deprecated:: 0.23\n ``n_jobs`` was deprecated in version 0.23 and will be removed in\n 1.0 (renaming of 0.25).\n\nalgorithm : {\"auto\", \"full\", \"elkan\"}, default=\"auto\"\n K-means algorithm to use. The classical EM-style algorithm is \"full\".\n The \"elkan\" variation is more efficient on data with well-defined\n clusters, by using the triangle inequality. However it's more memory\n intensive due to the allocation of an extra array of shape\n (n_samples, n_clusters).\n\n For now \"auto\" (kept for backward compatibiliy) chooses \"elkan\" but it\n might change in the future for a better heuristic.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n label[i] is the code or index of the centroid the\n i'th observation is closest to.\n\ninertia : float\n The final value of the inertia criterion (sum of squared distances to\n the closest centroid for all observations in the training set).\n\nbest_n_iter : int\n Number of iterations corresponding to the best results.\n Returned only if `return_n_iter` is set to True."
- },
- {
- "name": "_kmeans_single_elkan",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The observations to cluster. If sparse matrix, must be in CSR format."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The weights for each observation in X."
- },
- {
- "name": "centers_init",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The initial centers."
- },
- {
- "name": "max_iter",
- "type": "int",
- "hasDefault": true,
- "default": "300",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum number of iterations of the k-means algorithm to run."
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Verbosity mode."
- },
- {
- "name": "x_squared_norms",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Precomputed x_squared_norms."
- },
- {
- "name": "tol",
- "type": "float",
- "hasDefault": true,
- "default": "1e-4",
- "limitation": null,
- "ignored": false,
- "docstring": "Relative tolerance with regards to Frobenius norm of the difference in the cluster centers of two consecutive iterations to declare convergence. It's not advised to set `tol=0` since convergence might never be declared due to rounding errors. Use a very small number instead."
- },
- {
- "name": "n_threads",
- "type": "int",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of OpenMP threads to use for the computation. Parallelism is sample-wise on the main cython loop which assigns each sample to its closest center."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "A single run of k-means elkan, assumes preparation completed prior.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The observations to cluster. If sparse matrix, must be in CSR format.\n\nsample_weight : array-like of shape (n_samples,)\n The weights for each observation in X.\n\ncenters_init : ndarray of shape (n_clusters, n_features)\n The initial centers.\n\nmax_iter : int, default=300\n Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n Verbosity mode.\n\nx_squared_norms : array-like, default=None\n Precomputed x_squared_norms.\n\ntol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n It's not advised to set `tol=0` since convergence might never be\n declared due to rounding errors. Use a very small number instead.\n\nn_threads : int, default=1\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n label[i] is the code or index of the centroid the\n i'th observation is closest to.\n\ninertia : float\n The final value of the inertia criterion (sum of squared distances to\n the closest centroid for all observations in the training set).\n\nn_iter : int\n Number of iterations run."
- },
- {
- "name": "_kmeans_single_lloyd",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The observations to cluster. If sparse matrix, must be in CSR format."
- },
- {
- "name": "sample_weight",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The weights for each observation in X."
- },
- {
- "name": "centers_init",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The initial centers."
- },
- {
- "name": "max_iter",
- "type": "int",
- "hasDefault": true,
- "default": "300",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum number of iterations of the k-means algorithm to run."
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Verbosity mode"
- },
- {
- "name": "x_squared_norms",
- "type": "NDArray",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Precomputed x_squared_norms."
- },
- {
- "name": "tol",
- "type": "float",
- "hasDefault": true,
- "default": "1e-4",
- "limitation": null,
- "ignored": false,
- "docstring": "Relative tolerance with regards to Frobenius norm of the difference in the cluster centers of two consecutive iterations to declare convergence. It's not advised to set `tol=0` since convergence might never be declared due to rounding errors. Use a very small number instead."
- },
- {
- "name": "n_threads",
- "type": "int",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of OpenMP threads to use for the computation. Parallelism is sample-wise on the main cython loop which assigns each sample to its closest center."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "A single run of k-means lloyd, assumes preparation completed prior.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The observations to cluster. If sparse matrix, must be in CSR format.\n\nsample_weight : ndarray of shape (n_samples,)\n The weights for each observation in X.\n\ncenters_init : ndarray of shape (n_clusters, n_features)\n The initial centers.\n\nmax_iter : int, default=300\n Maximum number of iterations of the k-means algorithm to run.\n\nverbose : bool, default=False\n Verbosity mode\n\nx_squared_norms : ndarray of shape (n_samples,), default=None\n Precomputed x_squared_norms.\n\ntol : float, default=1e-4\n Relative tolerance with regards to Frobenius norm of the difference\n in the cluster centers of two consecutive iterations to declare\n convergence.\n It's not advised to set `tol=0` since convergence might never be\n declared due to rounding errors. Use a very small number instead.\n\nn_threads : int, default=1\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\nReturns\n-------\ncentroid : ndarray of shape (n_clusters, n_features)\n Centroids found at the last iteration of k-means.\n\nlabel : ndarray of shape (n_samples,)\n label[i] is the code or index of the centroid the\n i'th observation is closest to.\n\ninertia : float\n The final value of the inertia criterion (sum of squared distances to\n the closest centroid for all observations in the training set).\n\nn_iter : int\n Number of iterations run."
- },
- {
- "name": "_labels_inertia",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The input samples to assign to the labels. If sparse matrix, must be in CSR format."
- },
- {
- "name": "sample_weight",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The weights for each observation in X."
- },
- {
- "name": "x_squared_norms",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Precomputed squared euclidean norm of each data point, to speed up computations."
- },
- {
- "name": "centers",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The cluster centers."
- },
- {
- "name": "n_threads",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of OpenMP threads to use for the computation. Parallelism is sample-wise on the main cython loop which assigns each sample to its closest center."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "E step of the K-means EM algorithm.\n\nCompute the labels and the inertia of the given samples and centers.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples, n_features)\n The input samples to assign to the labels. If sparse matrix, must\n be in CSR format.\n\nsample_weight : ndarray of shape (n_samples,)\n The weights for each observation in X.\n\nx_squared_norms : ndarray of shape (n_samples,)\n Precomputed squared euclidean norm of each data point, to speed up\n computations.\n\ncenters : ndarray of shape (n_clusters, n_features)\n The cluster centers.\n\nn_threads : int, default=None\n The number of OpenMP threads to use for the computation. Parallelism is\n sample-wise on the main cython loop which assigns each sample to its\n closest center.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n The resulting assignment.\n\ninertia : float\n Sum of squared distances of samples to their closest cluster center."
- },
- {
- "name": "_mini_batch_step",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The original data array."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The weights for each observation in X."
- },
- {
- "name": "x_squared_norms",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Squared euclidean norm of each data point."
- },
- {
- "name": "centers",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The cluster centers. This array is MODIFIED IN PLACE"
- },
- {
- "name": "old_center_buffer",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Copy of old centers for monitoring convergence."
- },
- {
- "name": "compute_squared_diff",
- "type": "bool",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If set to False, the squared diff computation is skipped."
- },
- {
- "name": "distances",
- "type": "NDArray",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If not None, should be a pre-allocated array that will be used to store the distances of each sample to its closest center. May not be None when random_reassign is True."
- },
- {
- "name": "random_reassign",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, centers with very low counts are randomly reassigned to observations."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for centroid initialization and to pick new clusters amongst observations with uniform probability. Use an int to make the randomness deterministic. See :term:`Glossary `."
- },
- {
- "name": "reassignment_ratio",
- "type": "float",
- "hasDefault": true,
- "default": ".",
- "limitation": null,
- "ignored": false,
- "docstring": "Control the fraction of the maximum number of counts for a center to be reassigned. A higher value means that low count centers are more likely to be reassigned, which means that the model will take longer to converge, but should converge in a better clustering."
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Controls the verbosity."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Incremental update of the centers for the Minibatch K-Means algorithm.\n\nParameters\n----------\n\nX : ndarray of shape (n_samples, n_features)\n The original data array.\n\nsample_weight : array-like of shape (n_samples,)\n The weights for each observation in X.\n\nx_squared_norms : ndarray of shape (n_samples,)\n Squared euclidean norm of each data point.\n\ncenters : ndarray of shape (k, n_features)\n The cluster centers. This array is MODIFIED IN PLACE\n\nold_center_buffer : int\n Copy of old centers for monitoring convergence.\n\ncompute_squared_diff : bool\n If set to False, the squared diff computation is skipped.\n\ndistances : ndarray of shape (n_samples,), dtype=float, default=None\n If not None, should be a pre-allocated array that will be used to store\n the distances of each sample to its closest center.\n May not be None when random_reassign is True.\n\nrandom_reassign : bool, default=False\n If True, centers with very low counts are randomly reassigned\n to observations.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for centroid initialization and to\n pick new clusters amongst observations with uniform probability. Use\n an int to make the randomness deterministic.\n See :term:`Glossary `.\n\nreassignment_ratio : float, default=.01\n Control the fraction of the maximum number of counts for a\n center to be reassigned. A higher value means that low count\n centers are more likely to be reassigned, which means that the\n model will take longer to converge, but should converge in a\n better clustering.\n\nverbose : bool, default=False\n Controls the verbosity.\n\nReturns\n-------\ninertia : float\n Sum of squared distances of samples to their closest cluster center.\n\nsquared_diff : ndarray of shape (n_clusters,)\n Squared distances between previous and updated cluster centers."
- },
- {
- "name": "_mini_batch_convergence",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Helper function to encapsulate the early stopping logic."
- },
- {
- "name": "kmeans_plusplus",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The data to pick seeds from."
- },
- {
- "name": "n_clusters",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of centroids to initialize"
- },
- {
- "name": "x_squared_norms",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Squared Euclidean norm of each data point."
- },
- {
- "name": "random_state",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for centroid initialization. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- },
- {
- "name": "n_local_trials",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of seeding trials for each center (except the first), of which the one reducing inertia the most is greedily chosen. Set to None to make the number of trials depend logarithmically on the number of seeds (2+log(k))."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Init n_clusters seeds according to k-means++\n\n.. versionadded:: 0.24\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n The data to pick seeds from.\n\nn_clusters : int\n The number of centroids to initialize\n\nx_squared_norms : array-like of shape (n_samples,), default=None\n Squared Euclidean norm of each data point.\n\nrandom_state : int or RandomState instance, default=None\n Determines random number generation for centroid initialization. Pass\n an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nn_local_trials : int, default=None\n The number of seeding trials for each center (except the first),\n of which the one reducing inertia the most is greedily chosen.\n Set to None to make the number of trials depend logarithmically\n on the number of seeds (2+log(k)).\n\nReturns\n-------\ncenters : ndarray of shape (n_clusters, n_features)\n The inital centers for k-means.\n\nindices : ndarray of shape (n_clusters,)\n The index location of the chosen centers in the data array X. For a\n given index and center, X[index] = center.\n\nNotes\n-----\nSelects initial cluster centers for k-mean clustering in a smart way\nto speed up convergence. see: Arthur, D. and Vassilvitskii, S.\n\"k-means++: the advantages of careful seeding\". ACM-SIAM symposium\non Discrete algorithms. 2007\n\nExamples\n--------\n\n>>> from sklearn.cluster import kmeans_plusplus\n>>> import numpy as np\n>>> X = np.array([[1, 2], [1, 4], [1, 0],\n... [10, 2], [10, 4], [10, 0]])\n>>> centers, indices = kmeans_plusplus(X, n_clusters=2, random_state=0)\n>>> centers\narray([[10, 4],\n [ 1, 0]])\n>>> indices\narray([4, 2])"
- }
- ]
- },
- {
- "name": "sklearn.cluster._mean_shift",
- "imports": [
- "import numpy as np",
- "import warnings",
- "from joblib import Parallel",
- "from collections import defaultdict",
- "from utils.validation import check_is_fitted",
- "from utils.validation import _deprecate_positional_args",
- "from utils.fixes import delayed",
- "from utils import check_random_state",
- "from utils import gen_batches",
- "from utils import check_array",
- "from base import BaseEstimator",
- "from base import ClusterMixin",
- "from neighbors import NearestNeighbors",
- "from metrics.pairwise import pairwise_distances_argmin",
- "from _config import config_context"
- ],
- "classes": [
- {
- "name": "MeanShift",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "bandwidth",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Bandwidth used in the RBF kernel. If not given, the bandwidth is estimated using sklearn.cluster.estimate_bandwidth; see the documentation for that function for hints on scalability (see also the Notes, below)."
- },
- {
- "name": "seeds",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Seeds used to initialize kernels. If not set, the seeds are calculated by clustering.get_bin_seeds with bandwidth as the grid size and default values for other parameters."
- },
- {
- "name": "bin_seeding",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If true, initial kernel locations are not locations of all points, but rather the location of the discretized version of points, where points are binned onto a grid whose coarseness corresponds to the bandwidth. Setting this option to True will speed up the algorithm because fewer seeds will be initialized. The default value is False. Ignored if seeds argument is not None."
- },
- {
- "name": "min_bin_freq",
- "type": "int",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "To speed up the algorithm, accept only those bins with at least min_bin_freq points as seeds."
- },
- {
- "name": "cluster_all",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If true, then all points are clustered, even those orphans that are not within any kernel. Orphans are assigned to the nearest kernel. If false, then orphans are given cluster label -1."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of jobs to use for the computation. This works by computing each of the n_init runs in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details."
- },
- {
- "name": "max_iter",
- "type": "int",
- "hasDefault": true,
- "default": "300",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum number of iterations, per seed point before the clustering operation terminates (for that seed point), if has not converged yet. .. versionadded:: 0.22"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Samples to cluster."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": ""
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Perform clustering.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Samples to cluster.\n\ny : Ignored"
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "New data to predict."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Predict the closest cluster each sample in X belongs to.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n New data to predict.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Index of the cluster each sample belongs to."
- }
- ],
- "docstring": "Mean shift clustering using a flat kernel.\n\nMean shift clustering aims to discover \"blobs\" in a smooth density of\nsamples. It is a centroid-based algorithm, which works by updating\ncandidates for centroids to be the mean of the points within a given\nregion. These candidates are then filtered in a post-processing stage to\neliminate near-duplicates to form the final set of centroids.\n\nSeeding is performed using a binning technique for scalability.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nbandwidth : float, default=None\n Bandwidth used in the RBF kernel.\n\n If not given, the bandwidth is estimated using\n sklearn.cluster.estimate_bandwidth; see the documentation for that\n function for hints on scalability (see also the Notes, below).\n\nseeds : array-like of shape (n_samples, n_features), default=None\n Seeds used to initialize kernels. If not set,\n the seeds are calculated by clustering.get_bin_seeds\n with bandwidth as the grid size and default values for\n other parameters.\n\nbin_seeding : bool, default=False\n If true, initial kernel locations are not locations of all\n points, but rather the location of the discretized version of\n points, where points are binned onto a grid whose coarseness\n corresponds to the bandwidth. Setting this option to True will speed\n up the algorithm because fewer seeds will be initialized.\n The default value is False.\n Ignored if seeds argument is not None.\n\nmin_bin_freq : int, default=1\n To speed up the algorithm, accept only those bins with at least\n min_bin_freq points as seeds.\n\ncluster_all : bool, default=True\n If true, then all points are clustered, even those orphans that are\n not within any kernel. Orphans are assigned to the nearest kernel.\n If false, then orphans are given cluster label -1.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by computing\n each of the n_init runs in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nmax_iter : int, default=300\n Maximum number of iterations, per seed point before the clustering\n operation terminates (for that seed point), if has not converged yet.\n\n .. versionadded:: 0.22\n\nAttributes\n----------\ncluster_centers_ : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers.\n\nlabels_ : ndarray of shape (n_samples,)\n Labels of each point.\n\nn_iter_ : int\n Maximum number of iterations performed on each seed.\n\n .. versionadded:: 0.22\n\nExamples\n--------\n>>> from sklearn.cluster import MeanShift\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n... [4, 7], [3, 5], [3, 6]])\n>>> clustering = MeanShift(bandwidth=2).fit(X)\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])\n>>> clustering.predict([[0, 0], [5, 5]])\narray([1, 0])\n>>> clustering\nMeanShift(bandwidth=2)\n\nNotes\n-----\n\nScalability:\n\nBecause this implementation uses a flat kernel and\na Ball Tree to look up members of each kernel, the complexity will tend\ntowards O(T*n*log(n)) in lower dimensions, with n the number of samples\nand T the number of points. In higher dimensions the complexity will\ntend towards O(T*n^2).\n\nScalability can be boosted by using fewer seeds, for example by using\na higher value of min_bin_freq in the get_bin_seeds function.\n\nNote that the estimate_bandwidth function is much less scalable than the\nmean shift algorithm and will be the bottleneck if it is used.\n\nReferences\n----------\n\nDorin Comaniciu and Peter Meer, \"Mean Shift: A robust approach toward\nfeature space analysis\". IEEE Transactions on Pattern Analysis and\nMachine Intelligence. 2002. pp. 603-619."
- }
- ],
- "functions": [
- {
- "name": "estimate_bandwidth",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input points."
- },
- {
- "name": "quantile",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "should be between [0, 1] 0.5 means that the median of all pairwise distances is used."
- },
- {
- "name": "n_samples",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of samples to use. If not given, all samples are used."
- },
- {
- "name": "random_state",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The generator used to randomly select the samples from input points for bandwidth estimation. Use an int to make the randomness deterministic. See :term:`Glossary `."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Estimate the bandwidth to use with the mean-shift algorithm.\n\nThat this function takes time at least quadratic in n_samples. For large\ndatasets, it's wise to set that parameter to a small value.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Input points.\n\nquantile : float, default=0.3\n should be between [0, 1]\n 0.5 means that the median of all pairwise distances is used.\n\nn_samples : int, default=None\n The number of samples to use. If not given, all samples are used.\n\nrandom_state : int, RandomState instance, default=None\n The generator used to randomly select the samples from input points\n for bandwidth estimation. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nbandwidth : float\n The bandwidth parameter."
- },
- {
- "name": "_mean_shift_single_seed",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "mean_shift",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data."
- },
- {
- "name": "bandwidth",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Kernel bandwidth. If bandwidth is not given, it is determined using a heuristic based on the median of all pairwise distances. This will take quadratic time in the number of samples. The sklearn.cluster.estimate_bandwidth function can be used to do this more efficiently."
- },
- {
- "name": "seeds",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Point used as initial kernel locations. If None and bin_seeding=False, each data point is used as a seed. If None and bin_seeding=True, see bin_seeding."
- },
- {
- "name": "bin_seeding",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If true, initial kernel locations are not locations of all points, but rather the location of the discretized version of points, where points are binned onto a grid whose coarseness corresponds to the bandwidth. Setting this option to True will speed up the algorithm because fewer seeds will be initialized. Ignored if seeds argument is not None."
- },
- {
- "name": "min_bin_freq",
- "type": "int",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "To speed up the algorithm, accept only those bins with at least min_bin_freq points as seeds."
- },
- {
- "name": "cluster_all",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If true, then all points are clustered, even those orphans that are not within any kernel. Orphans are assigned to the nearest kernel. If false, then orphans are given cluster label -1."
- },
- {
- "name": "max_iter",
- "type": "int",
- "hasDefault": true,
- "default": "300",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum number of iterations, per seed point before the clustering operation terminates (for that seed point), if has not converged yet."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of jobs to use for the computation. This works by computing each of the n_init runs in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionadded:: 0.17 Parallel Execution using *n_jobs*."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Perform mean shift clustering of data using a flat kernel.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n\nX : array-like of shape (n_samples, n_features)\n Input data.\n\nbandwidth : float, default=None\n Kernel bandwidth.\n\n If bandwidth is not given, it is determined using a heuristic based on\n the median of all pairwise distances. This will take quadratic time in\n the number of samples. The sklearn.cluster.estimate_bandwidth function\n can be used to do this more efficiently.\n\nseeds : array-like of shape (n_seeds, n_features) or None\n Point used as initial kernel locations. If None and bin_seeding=False,\n each data point is used as a seed. If None and bin_seeding=True,\n see bin_seeding.\n\nbin_seeding : bool, default=False\n If true, initial kernel locations are not locations of all\n points, but rather the location of the discretized version of\n points, where points are binned onto a grid whose coarseness\n corresponds to the bandwidth. Setting this option to True will speed\n up the algorithm because fewer seeds will be initialized.\n Ignored if seeds argument is not None.\n\nmin_bin_freq : int, default=1\n To speed up the algorithm, accept only those bins with at least\n min_bin_freq points as seeds.\n\ncluster_all : bool, default=True\n If true, then all points are clustered, even those orphans that are\n not within any kernel. Orphans are assigned to the nearest kernel.\n If false, then orphans are given cluster label -1.\n\nmax_iter : int, default=300\n Maximum number of iterations, per seed point before the clustering\n operation terminates (for that seed point), if has not converged yet.\n\nn_jobs : int, default=None\n The number of jobs to use for the computation. This works by computing\n each of the n_init runs in parallel.\n\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionadded:: 0.17\n Parallel Execution using *n_jobs*.\n\nReturns\n-------\n\ncluster_centers : ndarray of shape (n_clusters, n_features)\n Coordinates of cluster centers.\n\nlabels : ndarray of shape (n_samples,)\n Cluster labels for each point.\n\nNotes\n-----\nFor an example, see :ref:`examples/cluster/plot_mean_shift.py\n`."
- },
- {
- "name": "get_bin_seeds",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input points, the same points that will be used in mean_shift."
- },
- {
- "name": "bin_size",
- "type": "float",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Controls the coarseness of the binning. Smaller values lead to more seeding (which is computationally more expensive). If you're not sure how to set this, set it to the value of the bandwidth used in clustering.mean_shift."
- },
- {
- "name": "min_bin_freq",
- "type": "int",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "Only bins with at least min_bin_freq will be selected as seeds. Raising this value decreases the number of seeds found, which makes mean_shift computationally cheaper."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Finds seeds for mean_shift.\n\nFinds seeds by first binning data onto a grid whose lines are\nspaced bin_size apart, and then choosing those bins with at least\nmin_bin_freq points.\n\nParameters\n----------\n\nX : array-like of shape (n_samples, n_features)\n Input points, the same points that will be used in mean_shift.\n\nbin_size : float\n Controls the coarseness of the binning. Smaller values lead\n to more seeding (which is computationally more expensive). If you're\n not sure how to set this, set it to the value of the bandwidth used\n in clustering.mean_shift.\n\nmin_bin_freq : int, default=1\n Only bins with at least min_bin_freq will be selected as seeds.\n Raising this value decreases the number of seeds found, which\n makes mean_shift computationally cheaper.\n\nReturns\n-------\nbin_seeds : array-like of shape (n_samples, n_features)\n Points used as initial kernel positions in clustering.mean_shift."
- }
- ]
- },
- {
- "name": "sklearn.cluster._optics",
- "imports": [
- "import warnings",
- "import numpy as np",
- "from utils import gen_batches",
- "from utils import get_chunk_n_rows",
- "from utils.validation import _deprecate_positional_args",
- "from neighbors import NearestNeighbors",
- "from base import BaseEstimator",
- "from base import ClusterMixin",
- "from metrics import pairwise_distances"
- ],
- "classes": [
- {
- "name": "OPTICS",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "min_samples",
- "type": null,
- "hasDefault": true,
- "default": "5",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of samples in a neighborhood for a point to be considered as a core point. Also, up and down steep regions can't have more than ``min_samples`` consecutive non-steep points. Expressed as an absolute number or a fraction of the number of samples (rounded to be at least 2)."
- },
- {
- "name": "max_eps",
- "type": "float",
- "hasDefault": true,
- "default": "np",
- "limitation": null,
- "ignored": false,
- "docstring": "The maximum distance between two samples for one to be considered as in the neighborhood of the other. Default value of ``np.inf`` will identify clusters across all scales; reducing ``max_eps`` will result in shorter run times."
- },
- {
- "name": "metric",
- "type": "Union[Callable, str]",
- "hasDefault": true,
- "default": "'minkowski'",
- "limitation": null,
- "ignored": false,
- "docstring": "Metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less efficient than passing the metric name as a string. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square. Valid values for metric are: - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'] - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] See the documentation for scipy.spatial.distance for details on these metrics."
- },
- {
- "name": "p",
- "type": "int",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameter for the Minkowski metric from :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used."
- },
- {
- "name": "metric_params",
- "type": "Dict",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Additional keyword arguments for the metric function."
- },
- {
- "name": "cluster_method",
- "type": "str",
- "hasDefault": true,
- "default": "'xi'",
- "limitation": null,
- "ignored": false,
- "docstring": "The extraction method used to extract clusters using the calculated reachability and ordering. Possible values are \"xi\" and \"dbscan\"."
- },
- {
- "name": "eps",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The maximum distance between two samples for one to be considered as in the neighborhood of the other. By default it assumes the same value as ``max_eps``. Used only when ``cluster_method='dbscan'``."
- },
- {
- "name": "xi",
- "type": null,
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines the minimum steepness on the reachability plot that constitutes a cluster boundary. For example, an upwards point in the reachability plot is defined by the ratio from one point to its successor being at most 1-xi. Used only when ``cluster_method='xi'``."
- },
- {
- "name": "predecessor_correction",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Correct clusters according to the predecessors calculated by OPTICS [2]_. This parameter has minimal effect on most datasets. Used only when ``cluster_method='xi'``."
- },
- {
- "name": "min_cluster_size",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Minimum number of samples in an OPTICS cluster, expressed as an absolute number or a fraction of the number of samples (rounded to be at least 2). If ``None``, the value of ``min_samples`` is used instead. Used only when ``cluster_method='xi'``."
- },
- {
- "name": "algorithm",
- "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']",
- "hasDefault": true,
- "default": "'auto'",
- "limitation": null,
- "ignored": false,
- "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. (default) Note: fitting on sparse input will override the setting of this parameter, using brute force."
- },
- {
- "name": "leaf_size",
- "type": "int",
- "hasDefault": true,
- "default": "30",
- "limitation": null,
- "ignored": false,
- "docstring": "Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "A feature array, or array of distances between samples if metric='precomputed'."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Ignored."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Perform OPTICS clustering.\n\nExtracts an ordered list of points and reachability distances, and\nperforms initial clustering using ``max_eps`` distance specified at\nOPTICS object instantiation.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features), or (n_samples, n_samples) if metric=\u2019precomputed\u2019\n A feature array, or array of distances between samples if\n metric='precomputed'.\n\ny : ignored\n Ignored.\n\nReturns\n-------\nself : instance of OPTICS\n The instance."
- }
- ],
- "docstring": "Estimate clustering structure from vector array.\n\nOPTICS (Ordering Points To Identify the Clustering Structure), closely\nrelated to DBSCAN, finds core sample of high density and expands clusters\nfrom them [1]_. Unlike DBSCAN, keeps cluster hierarchy for a variable\nneighborhood radius. Better suited for usage on large datasets than the\ncurrent sklearn implementation of DBSCAN.\n\nClusters are then extracted using a DBSCAN-like method\n(cluster_method = 'dbscan') or an automatic\ntechnique proposed in [1]_ (cluster_method = 'xi').\n\nThis implementation deviates from the original OPTICS by first performing\nk-nearest-neighborhood searches on all points to identify core sizes, then\ncomputing only the distances to unprocessed points when constructing the\ncluster order. Note that we do not employ a heap to manage the expansion\ncandidates, so the time complexity will be O(n^2).\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nmin_samples : int > 1 or float between 0 and 1, default=5\n The number of samples in a neighborhood for a point to be considered as\n a core point. Also, up and down steep regions can't have more than\n ``min_samples`` consecutive non-steep points. Expressed as an absolute\n number or a fraction of the number of samples (rounded to be at least\n 2).\n\nmax_eps : float, default=np.inf\n The maximum distance between two samples for one to be considered as\n in the neighborhood of the other. Default value of ``np.inf`` will\n identify clusters across all scales; reducing ``max_eps`` will result\n in shorter run times.\n\nmetric : str or callable, default='minkowski'\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string. If metric is\n \"precomputed\", X is assumed to be a distance matrix and must be square.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\np : int, default=2\n Parameter for the Minkowski metric from\n :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\ncluster_method : str, default='xi'\n The extraction method used to extract clusters using the calculated\n reachability and ordering. Possible values are \"xi\" and \"dbscan\".\n\neps : float, default=None\n The maximum distance between two samples for one to be considered as\n in the neighborhood of the other. By default it assumes the same value\n as ``max_eps``.\n Used only when ``cluster_method='dbscan'``.\n\nxi : float between 0 and 1, default=0.05\n Determines the minimum steepness on the reachability plot that\n constitutes a cluster boundary. For example, an upwards point in the\n reachability plot is defined by the ratio from one point to its\n successor being at most 1-xi.\n Used only when ``cluster_method='xi'``.\n\npredecessor_correction : bool, default=True\n Correct clusters according to the predecessors calculated by OPTICS\n [2]_. This parameter has minimal effect on most datasets.\n Used only when ``cluster_method='xi'``.\n\nmin_cluster_size : int > 1 or float between 0 and 1, default=None\n Minimum number of samples in an OPTICS cluster, expressed as an\n absolute number or a fraction of the number of samples (rounded to be\n at least 2). If ``None``, the value of ``min_samples`` is used instead.\n Used only when ``cluster_method='xi'``.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method. (default)\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n affect the speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nAttributes\n----------\nlabels_ : ndarray of shape (n_samples,)\n Cluster labels for each point in the dataset given to fit().\n Noisy samples and points which are not included in a leaf cluster\n of ``cluster_hierarchy_`` are labeled as -1.\n\nreachability_ : ndarray of shape (n_samples,)\n Reachability distances per sample, indexed by object order. Use\n ``clust.reachability_[clust.ordering_]`` to access in cluster order.\n\nordering_ : ndarray of shape (n_samples,)\n The cluster ordered list of sample indices.\n\ncore_distances_ : ndarray of shape (n_samples,)\n Distance at which each sample becomes a core point, indexed by object\n order. Points which will never be core have a distance of inf. Use\n ``clust.core_distances_[clust.ordering_]`` to access in cluster order.\n\npredecessor_ : ndarray of shape (n_samples,)\n Point that a sample was reached from, indexed by object order.\n Seed points have a predecessor of -1.\n\ncluster_hierarchy_ : ndarray of shape (n_clusters, 2)\n The list of clusters in the form of ``[start, end]`` in each row, with\n all indices inclusive. The clusters are ordered according to\n ``(end, -start)`` (ascending) so that larger clusters encompassing\n smaller clusters come after those smaller ones. Since ``labels_`` does\n not reflect the hierarchy, usually\n ``len(cluster_hierarchy_) > np.unique(optics.labels_)``. Please also\n note that these indices are of the ``ordering_``, i.e.\n ``X[ordering_][start:end + 1]`` form a cluster.\n Only available when ``cluster_method='xi'``.\n\nSee Also\n--------\nDBSCAN : A similar clustering for a specified neighborhood radius (eps).\n Our implementation is optimized for runtime.\n\nReferences\n----------\n.. [1] Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel,\n and J\u00f6rg Sander. \"OPTICS: ordering points to identify the clustering\n structure.\" ACM SIGMOD Record 28, no. 2 (1999): 49-60.\n\n.. [2] Schubert, Erich, Michael Gertz.\n \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329.\n\nExamples\n--------\n>>> from sklearn.cluster import OPTICS\n>>> import numpy as np\n>>> X = np.array([[1, 2], [2, 5], [3, 6],\n... [8, 7], [8, 8], [7, 3]])\n>>> clustering = OPTICS(min_samples=2).fit(X)\n>>> clustering.labels_\narray([0, 0, 0, 1, 1, 1])"
- }
- ],
- "functions": [
- {
- "name": "_validate_size",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_compute_core_distances_",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The data."
- },
- {
- "name": "neighbors",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The fitted nearest neighbors estimator."
- },
- {
- "name": "working_memory",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The sought maximum memory for temporary distance matrix chunks. When None (default), the value of ``sklearn.get_config()['working_memory']`` is used."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Compute the k-th nearest neighbor of each sample\n\nEquivalent to neighbors.kneighbors(X, self.min_samples)[0][:, -1]\nbut with more memory efficiency.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data.\nneighbors : NearestNeighbors instance\n The fitted nearest neighbors estimator.\nworking_memory : int, default=None\n The sought maximum memory for temporary distance matrix chunks.\n When None (default), the value of\n ``sklearn.get_config()['working_memory']`` is used.\n\nReturns\n-------\ncore_distances : ndarray of shape (n_samples,)\n Distance at which each sample becomes a core point.\n Points which will never be core have a distance of inf."
- },
- {
- "name": "compute_optics_graph",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "A feature array, or array of distances between samples if metric='precomputed'"
- },
- {
- "name": "min_samples",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of samples in a neighborhood for a point to be considered as a core point. Expressed as an absolute number or a fraction of the number of samples (rounded to be at least 2)."
- },
- {
- "name": "max_eps",
- "type": "float",
- "hasDefault": true,
- "default": "np",
- "limitation": null,
- "ignored": false,
- "docstring": "The maximum distance between two samples for one to be considered as in the neighborhood of the other. Default value of ``np.inf`` will identify clusters across all scales; reducing ``max_eps`` will result in shorter run times."
- },
- {
- "name": "metric",
- "type": "Union[Callable, str]",
- "hasDefault": true,
- "default": "'minkowski'",
- "limitation": null,
- "ignored": false,
- "docstring": "Metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. If metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays as input and return one value indicating the distance between them. This works for Scipy's metrics, but is less efficient than passing the metric name as a string. If metric is \"precomputed\", X is assumed to be a distance matrix and must be square. Valid values for metric are: - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan'] - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] See the documentation for scipy.spatial.distance for details on these metrics."
- },
- {
- "name": "p",
- "type": "int",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameter for the Minkowski metric from :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used."
- },
- {
- "name": "metric_params",
- "type": "Dict",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Additional keyword arguments for the metric function."
- },
- {
- "name": "algorithm",
- "type": "Literal['auto', 'ball_tree', 'kd_tree', 'brute']",
- "hasDefault": true,
- "default": "'auto'",
- "limitation": null,
- "ignored": false,
- "docstring": "Algorithm used to compute the nearest neighbors: - 'ball_tree' will use :class:`BallTree` - 'kd_tree' will use :class:`KDTree` - 'brute' will use a brute-force search. - 'auto' will attempt to decide the most appropriate algorithm based on the values passed to :meth:`fit` method. (default) Note: fitting on sparse input will override the setting of this parameter, using brute force."
- },
- {
- "name": "leaf_size",
- "type": "int",
- "hasDefault": true,
- "default": "30",
- "limitation": null,
- "ignored": false,
- "docstring": "Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of parallel jobs to run for neighbors search. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Computes the OPTICS reachability graph.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features), or (n_samples, n_samples) if metric=\u2019precomputed\u2019.\n A feature array, or array of distances between samples if\n metric='precomputed'\n\nmin_samples : int > 1 or float between 0 and 1\n The number of samples in a neighborhood for a point to be considered\n as a core point. Expressed as an absolute number or a fraction of the\n number of samples (rounded to be at least 2).\n\nmax_eps : float, default=np.inf\n The maximum distance between two samples for one to be considered as\n in the neighborhood of the other. Default value of ``np.inf`` will\n identify clusters across all scales; reducing ``max_eps`` will result\n in shorter run times.\n\nmetric : str or callable, default='minkowski'\n Metric to use for distance computation. Any metric from scikit-learn\n or scipy.spatial.distance can be used.\n\n If metric is a callable function, it is called on each\n pair of instances (rows) and the resulting value recorded. The callable\n should take two arrays as input and return one value indicating the\n distance between them. This works for Scipy's metrics, but is less\n efficient than passing the metric name as a string. If metric is\n \"precomputed\", X is assumed to be a distance matrix and must be square.\n\n Valid values for metric are:\n\n - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n 'manhattan']\n\n - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n 'yule']\n\n See the documentation for scipy.spatial.distance for details on these\n metrics.\n\np : int, default=2\n Parameter for the Minkowski metric from\n :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is\n equivalent to using manhattan_distance (l1), and euclidean_distance\n (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n\nmetric_params : dict, default=None\n Additional keyword arguments for the metric function.\n\nalgorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'\n Algorithm used to compute the nearest neighbors:\n\n - 'ball_tree' will use :class:`BallTree`\n - 'kd_tree' will use :class:`KDTree`\n - 'brute' will use a brute-force search.\n - 'auto' will attempt to decide the most appropriate algorithm\n based on the values passed to :meth:`fit` method. (default)\n\n Note: fitting on sparse input will override the setting of\n this parameter, using brute force.\n\nleaf_size : int, default=30\n Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can\n affect the speed of the construction and query, as well as the memory\n required to store the tree. The optimal value depends on the\n nature of the problem.\n\nn_jobs : int, default=None\n The number of parallel jobs to run for neighbors search.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nReturns\n-------\nordering_ : array of shape (n_samples,)\n The cluster ordered list of sample indices.\n\ncore_distances_ : array of shape (n_samples,)\n Distance at which each sample becomes a core point, indexed by object\n order. Points which will never be core have a distance of inf. Use\n ``clust.core_distances_[clust.ordering_]`` to access in cluster order.\n\nreachability_ : array of shape (n_samples,)\n Reachability distances per sample, indexed by object order. Use\n ``clust.reachability_[clust.ordering_]`` to access in cluster order.\n\npredecessor_ : array of shape (n_samples,)\n Point that a sample was reached from, indexed by object order.\n Seed points have a predecessor of -1.\n\nReferences\n----------\n.. [1] Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel,\n and J\u00f6rg Sander. \"OPTICS: ordering points to identify the clustering\n structure.\" ACM SIGMOD Record 28, no. 2 (1999): 49-60."
- },
- {
- "name": "_set_reach_dist",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "cluster_optics_dbscan",
- "decorators": [],
- "parameters": [
- {
- "name": "reachability",
- "type": "Array",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Reachability distances calculated by OPTICS (``reachability_``)"
- },
- {
- "name": "core_distances",
- "type": "Array",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Distances at which points become core (``core_distances_``)"
- },
- {
- "name": "ordering",
- "type": "Array",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "OPTICS ordered point indices (``ordering_``)"
- },
- {
- "name": "eps",
- "type": "float",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "DBSCAN ``eps`` parameter. Must be set to < ``max_eps``. Results will be close to DBSCAN algorithm if ``eps`` and ``max_eps`` are close to one another."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Performs DBSCAN extraction for an arbitrary epsilon.\n\nExtracting the clusters runs in linear time. Note that this results in\n``labels_`` which are close to a :class:`~sklearn.cluster.DBSCAN` with\nsimilar settings and ``eps``, only if ``eps`` is close to ``max_eps``.\n\nParameters\n----------\nreachability : array of shape (n_samples,)\n Reachability distances calculated by OPTICS (``reachability_``)\n\ncore_distances : array of shape (n_samples,)\n Distances at which points become core (``core_distances_``)\n\nordering : array of shape (n_samples,)\n OPTICS ordered point indices (``ordering_``)\n\neps : float\n DBSCAN ``eps`` parameter. Must be set to < ``max_eps``. Results\n will be close to DBSCAN algorithm if ``eps`` and ``max_eps`` are close\n to one another.\n\nReturns\n-------\nlabels_ : array of shape (n_samples,)\n The estimated labels."
- },
- {
- "name": "cluster_optics_xi",
- "decorators": [],
- "parameters": [
- {
- "name": "reachability",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Reachability distances calculated by OPTICS (`reachability_`)"
- },
- {
- "name": "predecessor",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Predecessors calculated by OPTICS."
- },
- {
- "name": "ordering",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "OPTICS ordered point indices (`ordering_`)"
- },
- {
- "name": "min_samples",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The same as the min_samples given to OPTICS. Up and down steep regions can't have more then ``min_samples`` consecutive non-steep points. Expressed as an absolute number or a fraction of the number of samples (rounded to be at least 2)."
- },
- {
- "name": "min_cluster_size",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Minimum number of samples in an OPTICS cluster, expressed as an absolute number or a fraction of the number of samples (rounded to be at least 2). If ``None``, the value of ``min_samples`` is used instead."
- },
- {
- "name": "xi",
- "type": null,
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines the minimum steepness on the reachability plot that constitutes a cluster boundary. For example, an upwards point in the reachability plot is defined by the ratio from one point to its successor being at most 1-xi."
- },
- {
- "name": "predecessor_correction",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Correct clusters based on the calculated predecessors."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Automatically extract clusters according to the Xi-steep method.\n\nParameters\n----------\nreachability : ndarray of shape (n_samples,)\n Reachability distances calculated by OPTICS (`reachability_`)\n\npredecessor : ndarray of shape (n_samples,)\n Predecessors calculated by OPTICS.\n\nordering : ndarray of shape (n_samples,)\n OPTICS ordered point indices (`ordering_`)\n\nmin_samples : int > 1 or float between 0 and 1\n The same as the min_samples given to OPTICS. Up and down steep regions\n can't have more then ``min_samples`` consecutive non-steep points.\n Expressed as an absolute number or a fraction of the number of samples\n (rounded to be at least 2).\n\nmin_cluster_size : int > 1 or float between 0 and 1, default=None\n Minimum number of samples in an OPTICS cluster, expressed as an\n absolute number or a fraction of the number of samples (rounded to be\n at least 2). If ``None``, the value of ``min_samples`` is used instead.\n\nxi : float between 0 and 1, default=0.05\n Determines the minimum steepness on the reachability plot that\n constitutes a cluster boundary. For example, an upwards point in the\n reachability plot is defined by the ratio from one point to its\n successor being at most 1-xi.\n\npredecessor_correction : bool, default=True\n Correct clusters based on the calculated predecessors.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n The labels assigned to samples. Points which are not included\n in any cluster are labeled as -1.\n\nclusters : ndarray of shape (n_clusters, 2)\n The list of clusters in the form of ``[start, end]`` in each row, with\n all indices inclusive. The clusters are ordered according to ``(end,\n -start)`` (ascending) so that larger clusters encompassing smaller\n clusters come after such nested smaller clusters. Since ``labels`` does\n not reflect the hierarchy, usually ``len(clusters) >\n np.unique(labels)``."
- },
- {
- "name": "_extend_region",
- "decorators": [],
- "parameters": [
- {
- "name": "steep_point",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "True if the point is steep downward (upward)."
- },
- {
- "name": "xward_point",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "True if the point is an upward (respectively downward) point."
- },
- {
- "name": "start",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The start of the xward region."
- },
- {
- "name": "min_samples",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The same as the min_samples given to OPTICS. Up and down steep regions can't have more then ``min_samples`` consecutive non-steep points."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Extend the area until it's maximal.\n\nIt's the same function for both upward and downward reagions, depending on\nthe given input parameters. Assuming:\n\n - steep_{upward/downward}: bool array indicating whether a point is a\n steep {upward/downward};\n - upward/downward: bool array indicating whether a point is\n upward/downward;\n\nTo extend an upward reagion, ``steep_point=steep_upward`` and\n``xward_point=downward`` are expected, and to extend a downward region,\n``steep_point=steep_downward`` and ``xward_point=upward``.\n\nParameters\n----------\nsteep_point : ndarray of shape (n_samples,), dtype=bool\n True if the point is steep downward (upward).\n\nxward_point : ndarray of shape (n_samples,), dtype=bool\n True if the point is an upward (respectively downward) point.\n\nstart : int\n The start of the xward region.\n\nmin_samples : int\n The same as the min_samples given to OPTICS. Up and down steep\n regions can't have more then ``min_samples`` consecutive non-steep\n points.\n\nReturns\n-------\nindex : int\n The current index iterating over all the samples, i.e. where we are up\n to in our search.\n\nend : int\n The end of the region, which can be behind the index. The region\n includes the ``end`` index."
- },
- {
- "name": "_update_filter_sdas",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Update steep down areas (SDAs) using the new maximum in between (mib)\nvalue, and the given complement of xi, i.e. ``1 - xi``."
- },
- {
- "name": "_correct_predecessor",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Correct for predecessors.\n\nApplies Algorithm 2 of [1]_.\n\nInput parameters are ordered by the computer OPTICS ordering.\n\n.. [1] Schubert, Erich, Michael Gertz.\n \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329."
- },
- {
- "name": "_xi_cluster",
- "decorators": [],
- "parameters": [
- {
- "name": "reachability_plot",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The reachability plot, i.e. reachability ordered according to the calculated ordering, all computed by OPTICS."
- },
- {
- "name": "predecessor_plot",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Predecessors ordered according to the calculated ordering."
- },
- {
- "name": "xi",
- "type": "float",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines the minimum steepness on the reachability plot that constitutes a cluster boundary. For example, an upwards point in the reachability plot is defined by the ratio from one point to its successor being at most 1-xi."
- },
- {
- "name": "min_samples",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The same as the min_samples given to OPTICS. Up and down steep regions can't have more then ``min_samples`` consecutive non-steep points."
- },
- {
- "name": "min_cluster_size",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Minimum number of samples in an OPTICS cluster."
- },
- {
- "name": "predecessor_correction",
- "type": "bool",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Correct clusters based on the calculated predecessors."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Automatically extract clusters according to the Xi-steep method.\n\nThis is rouphly an implementation of Figure 19 of the OPTICS paper.\n\nParameters\n----------\nreachability_plot : array-like of shape (n_samples,)\n The reachability plot, i.e. reachability ordered according to\n the calculated ordering, all computed by OPTICS.\n\npredecessor_plot : array-like of shape (n_samples,)\n Predecessors ordered according to the calculated ordering.\n\nxi : float, between 0 and 1\n Determines the minimum steepness on the reachability plot that\n constitutes a cluster boundary. For example, an upwards point in the\n reachability plot is defined by the ratio from one point to its\n successor being at most 1-xi.\n\nmin_samples : int > 1\n The same as the min_samples given to OPTICS. Up and down steep regions\n can't have more then ``min_samples`` consecutive non-steep points.\n\nmin_cluster_size : int > 1\n Minimum number of samples in an OPTICS cluster.\n\npredecessor_correction : bool\n Correct clusters based on the calculated predecessors.\n\nReturns\n-------\nclusters : ndarray of shape (n_clusters, 2)\n The list of clusters in the form of [start, end] in each row, with all\n indices inclusive. The clusters are ordered in a way that larger\n clusters encompassing smaller clusters come after those smaller\n clusters."
- },
- {
- "name": "_extract_xi_labels",
- "decorators": [],
- "parameters": [
- {
- "name": "ordering",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The ordering of points calculated by OPTICS"
- },
- {
- "name": "clusters",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "List of clusters i.e. (start, end) tuples, as returned by `_xi_cluster`."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Extracts the labels from the clusters returned by `_xi_cluster`.\nWe rely on the fact that clusters are stored\nwith the smaller clusters coming before the larger ones.\n\nParameters\n----------\nordering : array-like of shape (n_samples,)\n The ordering of points calculated by OPTICS\n\nclusters : array-like of shape (n_clusters, 2)\n List of clusters i.e. (start, end) tuples,\n as returned by `_xi_cluster`.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)"
- }
- ]
- },
- {
- "name": "sklearn.cluster._spectral",
- "imports": [
- "import warnings",
- "import numpy as np",
- "from base import BaseEstimator",
- "from base import ClusterMixin",
- "from utils import check_random_state",
- "from utils import as_float_array",
- "from utils.validation import _deprecate_positional_args",
- "from utils.deprecation import deprecated",
- "from metrics.pairwise import pairwise_kernels",
- "from neighbors import kneighbors_graph",
- "from neighbors import NearestNeighbors",
- "from manifold import spectral_embedding",
- "from _kmeans import k_means",
- "from scipy.sparse import csc_matrix",
- "from scipy.linalg import LinAlgError"
- ],
- "classes": [
- {
- "name": "SpectralClustering",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "n_clusters",
- "type": "int",
- "hasDefault": true,
- "default": "8",
- "limitation": null,
- "ignored": false,
- "docstring": "The dimension of the projection subspace."
- },
- {
- "name": "eigen_solver",
- "type": "Literal['arpack', 'lobpcg', 'amg']",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The eigenvalue decomposition strategy to use. AMG requires pyamg to be installed. It can be faster on very large, sparse problems, but may also lead to instabilities. If None, then ``'arpack'`` is used."
- },
- {
- "name": "n_components",
- "type": "int",
- "hasDefault": true,
- "default": "n_clusters",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of eigen vectors to use for the spectral embedding"
- },
- {
- "name": "random_state",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "A pseudo random number generator used for the initialization of the lobpcg eigen vectors decomposition when ``eigen_solver='amg'`` and by the K-Means initialization. Use an int to make the randomness deterministic. See :term:`Glossary `."
- },
- {
- "name": "n_init",
- "type": "int",
- "hasDefault": true,
- "default": "10",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia."
- },
- {
- "name": "gamma",
- "type": "float",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels. Ignored for ``affinity='nearest_neighbors'``."
- },
- {
- "name": "affinity",
- "type": "Union[Callable, str]",
- "hasDefault": true,
- "default": "'rbf'",
- "limitation": null,
- "ignored": false,
- "docstring": "How to construct the affinity matrix. - 'nearest_neighbors' : construct the affinity matrix by computing a graph of nearest neighbors. - 'rbf' : construct the affinity matrix using a radial basis function (RBF) kernel. - 'precomputed' : interpret ``X`` as a precomputed affinity matrix. - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph of precomputed nearest neighbors, and constructs the affinity matrix by selecting the ``n_neighbors`` nearest neighbors. - one of the kernels supported by :func:`~sklearn.metrics.pairwise_kernels`. Only kernels that produce similarity scores (non-negative values that increase with similarity) should be used. This property is not checked by the clustering algorithm."
- },
- {
- "name": "n_neighbors",
- "type": "int",
- "hasDefault": true,
- "default": "10",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of neighbors to use when constructing the affinity matrix using the nearest neighbors method. Ignored for ``affinity='rbf'``."
- },
- {
- "name": "eigen_tol",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Stopping criterion for eigendecomposition of the Laplacian matrix when ``eigen_solver='arpack'``."
- },
- {
- "name": "assign_labels",
- "type": "Literal['kmeans', 'discretize']",
- "hasDefault": true,
- "default": "'kmeans'",
- "limitation": null,
- "ignored": false,
- "docstring": "The strategy to use to assign labels in the embedding space. There are two ways to assign labels after the laplacian embedding. k-means can be applied and is a popular choice. But it can also be sensitive to initialization. Discretization is another approach which is less sensitive to random initialization."
- },
- {
- "name": "degree",
- "type": "float",
- "hasDefault": true,
- "default": "3",
- "limitation": null,
- "ignored": false,
- "docstring": "Degree of the polynomial kernel. Ignored by other kernels."
- },
- {
- "name": "coef0",
- "type": "float",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "Zero coefficient for polynomial and sigmoid kernels. Ignored by other kernels."
- },
- {
- "name": "kernel_params",
- "type": "Dict",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameters (keyword arguments) and values for kernel passed as callable object. Ignored by other kernels."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of parallel jobs to run when `affinity='nearest_neighbors'` or `affinity='precomputed_nearest_neighbors'`. The neighbors search will be done in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details."
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Verbosity mode. .. versionadded:: 0.24"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training instances to cluster, or similarities / affinities between instances if ``affinity='precomputed'``. If a sparse matrix is provided in a format other than ``csr_matrix``, ``csc_matrix``, or ``coo_matrix``, it will be converted into a sparse ``csr_matrix``."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present here for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Perform spectral clustering from features, or affinity matrix.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse matrix is\n provided in a format other than ``csr_matrix``, ``csc_matrix``,\n or ``coo_matrix``, it will be converted into a sparse\n ``csr_matrix``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nself"
- },
- {
- "name": "fit_predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training instances to cluster, or similarities / affinities between instances if ``affinity='precomputed'``. If a sparse matrix is provided in a format other than ``csr_matrix``, ``csc_matrix``, or ``coo_matrix``, it will be converted into a sparse ``csr_matrix``."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present here for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Perform spectral clustering from features, or affinity matrix,\nand return cluster labels.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features), or array-like of shape (n_samples, n_samples)\n Training instances to cluster, or similarities / affinities between\n instances if ``affinity='precomputed'``. If a sparse matrix is\n provided in a format other than ``csr_matrix``, ``csc_matrix``,\n or ``coo_matrix``, it will be converted into a sparse\n ``csr_matrix``.\n\ny : Ignored\n Not used, present here for API consistency by convention.\n\nReturns\n-------\nlabels : ndarray of shape (n_samples,)\n Cluster labels."
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_pairwise",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster. For instance when clusters are\nnested circles on the 2D plane.\n\nIf affinity is the adjacency matrix of a graph, this method can be\nused to find normalized graph cuts.\n\nWhen calling ``fit``, an affinity matrix is constructed using either\nkernel function such the Gaussian (aka RBF) kernel of the euclidean\ndistanced ``d(X, X)``::\n\n np.exp(-gamma * d(X,X) ** 2)\n\nor a k-nearest neighbors connectivity matrix.\n\nAlternatively, using ``precomputed``, a user-provided affinity\nmatrix can be used.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_clusters : int, default=8\n The dimension of the projection subspace.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems,\n but may also lead to instabilities. If None, then ``'arpack'`` is\n used.\n\nn_components : int, default=n_clusters\n Number of eigen vectors to use for the spectral embedding\n\nrandom_state : int, RandomState instance, default=None\n A pseudo random number generator used for the initialization of the\n lobpcg eigen vectors decomposition when ``eigen_solver='amg'`` and by\n the K-Means initialization. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nn_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of\n n_init consecutive runs in terms of inertia.\n\ngamma : float, default=1.0\n Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels.\n Ignored for ``affinity='nearest_neighbors'``.\n\naffinity : str or callable, default='rbf'\n How to construct the affinity matrix.\n - 'nearest_neighbors' : construct the affinity matrix by computing a\n graph of nearest neighbors.\n - 'rbf' : construct the affinity matrix using a radial basis function\n (RBF) kernel.\n - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.\n - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph\n of precomputed nearest neighbors, and constructs the affinity matrix\n by selecting the ``n_neighbors`` nearest neighbors.\n - one of the kernels supported by\n :func:`~sklearn.metrics.pairwise_kernels`.\n\n Only kernels that produce similarity scores (non-negative values that\n increase with similarity) should be used. This property is not checked\n by the clustering algorithm.\n\nn_neighbors : int, default=10\n Number of neighbors to use when constructing the affinity matrix using\n the nearest neighbors method. Ignored for ``affinity='rbf'``.\n\neigen_tol : float, default=0.0\n Stopping criterion for eigendecomposition of the Laplacian matrix\n when ``eigen_solver='arpack'``.\n\nassign_labels : {'kmeans', 'discretize'}, default='kmeans'\n The strategy to use to assign labels in the embedding\n space. There are two ways to assign labels after the laplacian\n embedding. k-means can be applied and is a popular choice. But it can\n also be sensitive to initialization. Discretization is another approach\n which is less sensitive to random initialization.\n\ndegree : float, default=3\n Degree of the polynomial kernel. Ignored by other kernels.\n\ncoef0 : float, default=1\n Zero coefficient for polynomial and sigmoid kernels.\n Ignored by other kernels.\n\nkernel_params : dict of str to any, default=None\n Parameters (keyword arguments) and values for kernel passed as\n callable object. Ignored by other kernels.\n\nn_jobs : int, default=None\n The number of parallel jobs to run when `affinity='nearest_neighbors'`\n or `affinity='precomputed_nearest_neighbors'`. The neighbors search\n will be done in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool, default=False\n Verbosity mode.\n\n .. versionadded:: 0.24\n\nAttributes\n----------\naffinity_matrix_ : array-like of shape (n_samples, n_samples)\n Affinity matrix used for clustering. Available only if after calling\n ``fit``.\n\nlabels_ : ndarray of shape (n_samples,)\n Labels of each point\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralClustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n... [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralClustering(n_clusters=2,\n... assign_labels=\"discretize\",\n... random_state=0).fit(X)\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])\n>>> clustering\nSpectralClustering(assign_labels='discretize', n_clusters=2,\n random_state=0)\n\nNotes\n-----\nIf you have an affinity matrix, such as a distance matrix,\nfor which 0 means identical elements, and high values means\nvery dissimilar elements, it can be transformed in a\nsimilarity matrix that is well suited for the algorithm by\napplying the Gaussian (RBF, heat) kernel::\n\n np.exp(- dist_matrix ** 2 / (2. * delta ** 2))\n\nWhere ``delta`` is a free parameter representing the width of the Gaussian\nkernel.\n\nAnother alternative is to take a symmetric version of the k\nnearest neighbors connectivity matrix of the points.\n\nIf the pyamg package is installed, it is used: this greatly\nspeeds up computation.\n\nReferences\n----------\n\n- Normalized cuts and image segmentation, 2000\n Jianbo Shi, Jitendra Malik\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324\n\n- A Tutorial on Spectral Clustering, 2007\n Ulrike von Luxburg\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323\n\n- Multiclass spectral clustering, 2003\n Stella X. Yu, Jianbo Shi\n https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf"
- }
- ],
- "functions": [
- {
- "name": "discretize",
- "decorators": [],
- "parameters": [
- {
- "name": "vectors",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The embedding space of the samples."
- },
- {
- "name": "copy",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to copy vectors, or perform in-place normalization."
- },
- {
- "name": "max_svd_restarts",
- "type": "int",
- "hasDefault": true,
- "default": "30",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum number of attempts to restart SVD if convergence fails"
- },
- {
- "name": "n_iter_max",
- "type": "int",
- "hasDefault": true,
- "default": "30",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum number of iterations to attempt in rotation and partition matrix search if machine precision convergence is not reached"
- },
- {
- "name": "random_state",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for rotation matrix initialization. Use an int to make the randomness deterministic. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Search for a partition matrix (clustering) which is closest to the\neigenvector embedding.\n\nParameters\n----------\nvectors : array-like of shape (n_samples, n_clusters)\n The embedding space of the samples.\n\ncopy : bool, default=True\n Whether to copy vectors, or perform in-place normalization.\n\nmax_svd_restarts : int, default=30\n Maximum number of attempts to restart SVD if convergence fails\n\nn_iter_max : int, default=30\n Maximum number of iterations to attempt in rotation and partition\n matrix search if machine precision convergence is not reached\n\nrandom_state : int, RandomState instance, default=None\n Determines random number generation for rotation matrix initialization.\n Use an int to make the randomness deterministic.\n See :term:`Glossary `.\n\nReturns\n-------\nlabels : array of integers, shape: n_samples\n The labels of the clusters.\n\nReferences\n----------\n\n- Multiclass spectral clustering, 2003\n Stella X. Yu, Jianbo Shi\n https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf\n\nNotes\n-----\n\nThe eigenvector embedding is used to iteratively search for the\nclosest discrete partition. First, the eigenvector embedding is\nnormalized to the space of partition matrices. An optimal discrete\npartition matrix closest to this normalized embedding multiplied by\nan initial rotation is calculated. Fixing this discrete partition\nmatrix, an optimal rotation matrix is calculated. These two\ncalculations are performed until convergence. The discrete partition\nmatrix is returned as the clustering solution. Used in spectral\nclustering, this method tends to be faster and more robust to random\ninitialization than k-means."
- },
- {
- "name": "spectral_clustering",
- "decorators": [],
- "parameters": [
- {
- "name": "affinity",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The affinity matrix describing the relationship of the samples to embed. **Must be symmetric**. Possible examples: - adjacency matrix of a graph, - heat kernel of the pairwise distance matrix of the samples, - symmetric k-nearest neighbours connectivity matrix of the samples."
- },
- {
- "name": "n_clusters",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of clusters to extract."
- },
- {
- "name": "n_components",
- "type": "int",
- "hasDefault": true,
- "default": "n_clusters",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of eigen vectors to use for the spectral embedding"
- },
- {
- "name": "eigen_solver",
- "type": "Optional[Literal['arpack', 'lobpcg']]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The eigenvalue decomposition strategy to use. AMG requires pyamg to be installed. It can be faster on very large, sparse problems, but may also lead to instabilities. If None, then ``'arpack'`` is used."
- },
- {
- "name": "random_state",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "A pseudo random number generator used for the initialization of the lobpcg eigen vectors decomposition when eigen_solver == 'amg' and by the K-Means initialization. Use an int to make the randomness deterministic. See :term:`Glossary `."
- },
- {
- "name": "n_init",
- "type": "int",
- "hasDefault": true,
- "default": "10",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia."
- },
- {
- "name": "eigen_tol",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Stopping criterion for eigendecomposition of the Laplacian matrix when using arpack eigen_solver."
- },
- {
- "name": "assign_labels",
- "type": "Literal['kmeans', 'discretize']",
- "hasDefault": true,
- "default": "'kmeans'",
- "limitation": null,
- "ignored": false,
- "docstring": "The strategy to use to assign labels in the embedding space. There are two ways to assign labels after the laplacian embedding. k-means can be applied and is a popular choice. But it can also be sensitive to initialization. Discretization is another approach which is less sensitive to random initialization. See the 'Multiclass spectral clustering' paper referenced below for more details on the discretization approach."
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Verbosity mode. .. versionadded:: 0.24"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster. For instance, when clusters are\nnested circles on the 2D plane.\n\nIf affinity is the adjacency matrix of a graph, this method can be\nused to find normalized graph cuts.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\naffinity : {array-like, sparse matrix} of shape (n_samples, n_samples)\n The affinity matrix describing the relationship of the samples to\n embed. **Must be symmetric**.\n\n Possible examples:\n - adjacency matrix of a graph,\n - heat kernel of the pairwise distance matrix of the samples,\n - symmetric k-nearest neighbours connectivity matrix of the samples.\n\nn_clusters : int, default=None\n Number of clusters to extract.\n\nn_components : int, default=n_clusters\n Number of eigen vectors to use for the spectral embedding\n\neigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}\n The eigenvalue decomposition strategy to use. AMG requires pyamg\n to be installed. It can be faster on very large, sparse problems,\n but may also lead to instabilities. If None, then ``'arpack'`` is\n used.\n\nrandom_state : int, RandomState instance, default=None\n A pseudo random number generator used for the initialization of the\n lobpcg eigen vectors decomposition when eigen_solver == 'amg' and by\n the K-Means initialization. Use an int to make the randomness\n deterministic.\n See :term:`Glossary `.\n\nn_init : int, default=10\n Number of time the k-means algorithm will be run with different\n centroid seeds. The final results will be the best output of\n n_init consecutive runs in terms of inertia.\n\neigen_tol : float, default=0.0\n Stopping criterion for eigendecomposition of the Laplacian matrix\n when using arpack eigen_solver.\n\nassign_labels : {'kmeans', 'discretize'}, default='kmeans'\n The strategy to use to assign labels in the embedding\n space. There are two ways to assign labels after the laplacian\n embedding. k-means can be applied and is a popular choice. But it can\n also be sensitive to initialization. Discretization is another\n approach which is less sensitive to random initialization. See\n the 'Multiclass spectral clustering' paper referenced below for\n more details on the discretization approach.\n\nverbose : bool, default=False\n Verbosity mode.\n\n .. versionadded:: 0.24\n\nReturns\n-------\nlabels : array of integers, shape: n_samples\n The labels of the clusters.\n\nReferences\n----------\n\n- Normalized cuts and image segmentation, 2000\n Jianbo Shi, Jitendra Malik\n http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324\n\n- A Tutorial on Spectral Clustering, 2007\n Ulrike von Luxburg\n http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323\n\n- Multiclass spectral clustering, 2003\n Stella X. Yu, Jianbo Shi\n https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf\n\nNotes\n-----\nThe graph should contain only one connect component, elsewhere\nthe results make little sense.\n\nThis algorithm solves the normalized cut for k=2: it is a\nnormalized spectral clustering."
- }
- ]
- },
- {
- "name": "sklearn.cluster",
- "imports": [
- "from _spectral import spectral_clustering",
- "from _spectral import SpectralClustering",
- "from _mean_shift import mean_shift",
- "from _mean_shift import MeanShift",
- "from _mean_shift import estimate_bandwidth",
- "from _mean_shift import get_bin_seeds",
- "from _affinity_propagation import affinity_propagation",
- "from _affinity_propagation import AffinityPropagation",
- "from _agglomerative import ward_tree",
- "from _agglomerative import AgglomerativeClustering",
- "from _agglomerative import linkage_tree",
- "from _agglomerative import FeatureAgglomeration",
- "from _kmeans import k_means",
- "from _kmeans import KMeans",
- "from _kmeans import MiniBatchKMeans",
- "from _kmeans import kmeans_plusplus",
- "from _dbscan import dbscan",
- "from _dbscan import DBSCAN",
- "from _optics import OPTICS",
- "from _optics import cluster_optics_dbscan",
- "from _optics import compute_optics_graph",
- "from _optics import cluster_optics_xi",
- "from _bicluster import SpectralBiclustering",
- "from _bicluster import SpectralCoclustering",
- "from _birch import Birch"
- ],
- "classes": [],
- "functions": []
- },
- {
- "name": "sklearn.cluster.tests.common",
- "imports": [
- "import numpy as np"
- ],
- "classes": [],
- "functions": [
- {
- "name": "generate_clustered_data",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.cluster.tests.test_affinity_propagation",
- "imports": [
- "import numpy as np",
- "import pytest",
- "from scipy.sparse import csr_matrix",
- "from sklearn.exceptions import ConvergenceWarning",
- "from sklearn.utils._testing import assert_array_equal",
- "from sklearn.utils._testing import assert_warns",
- "from sklearn.utils._testing import assert_warns_message",
- "from sklearn.utils._testing import assert_no_warnings",
- "from sklearn.cluster import AffinityPropagation",
- "from sklearn.cluster._affinity_propagation import _equal_similarities_and_preferences",
- "from sklearn.cluster import affinity_propagation",
- "from sklearn.datasets import make_blobs",
- "from sklearn.metrics import euclidean_distances"
- ],
- "classes": [],
- "functions": [
- {
- "name": "test_affinity_propagation",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_affinity_propagation_predict",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_affinity_propagation_predict_error",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_affinity_propagation_fit_non_convergence",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_affinity_propagation_equal_mutual_similarities",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_affinity_propagation_predict_non_convergence",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_affinity_propagation_non_convergence_regressiontest",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_equal_similarities_and_preferences",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_affinity_propagation_random_state",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_affinity_propagation_random_state_warning",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_affinity_propagation_convergence_warning_dense_sparse",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Non-regression, see #13334"
- },
- {
- "name": "test_affinity_propagation_float32",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_affinity_propagation_pairwise_is_deprecated",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.cluster.tests.test_bicluster",
- "imports": [
- "import numpy as np",
- "import pytest",
- "from scipy.sparse import csr_matrix",
- "from scipy.sparse import issparse",
- "from sklearn.model_selection import ParameterGrid",
- "from sklearn.utils._testing import assert_almost_equal",
- "from sklearn.utils._testing import assert_array_equal",
- "from sklearn.utils._testing import assert_array_almost_equal",
- "from sklearn.base import BaseEstimator",
- "from sklearn.base import BiclusterMixin",
- "from sklearn.cluster import SpectralCoclustering",
- "from sklearn.cluster import SpectralBiclustering",
- "from sklearn.cluster._bicluster import _scale_normalize",
- "from sklearn.cluster._bicluster import _bistochastic_normalize",
- "from sklearn.cluster._bicluster import _log_normalize",
- "from sklearn.metrics import consensus_score",
- "from sklearn.metrics import v_measure_score",
- "from sklearn.datasets import make_biclusters",
- "from sklearn.datasets import make_checkerboard"
- ],
- "classes": [
- {
- "name": "MockBiclustering",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "get_indices",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": null
- }
- ],
- "functions": [
- {
- "name": "test_get_submatrix",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_test_shape_indices",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_spectral_coclustering",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_spectral_biclustering",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_do_scale_test",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Check that rows sum to one constant, and columns to another."
- },
- {
- "name": "_do_bistochastic_test",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Check that rows and columns sum to the same constant."
- },
- {
- "name": "test_scale_normalize",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_bistochastic_normalize",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_log_normalize",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_fit_best_piecewise",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_project_and_cluster",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_perfect_checkerboard",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_errors",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_wrong_shape",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_n_features_in_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_n_jobs_deprecated",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.cluster.tests.test_birch",
- "imports": [
- "from scipy import sparse",
- "import numpy as np",
- "import pytest",
- "from sklearn.cluster.tests.common import generate_clustered_data",
- "from sklearn.cluster import Birch",
- "from sklearn.cluster import AgglomerativeClustering",
- "from sklearn.datasets import make_blobs",
- "from sklearn.exceptions import ConvergenceWarning",
- "from sklearn.linear_model import ElasticNet",
- "from sklearn.metrics import pairwise_distances_argmin",
- "from sklearn.metrics import v_measure_score",
- "from sklearn.utils._testing import assert_almost_equal",
- "from sklearn.utils._testing import assert_array_equal",
- "from sklearn.utils._testing import assert_array_almost_equal",
- "from sklearn.utils._testing import assert_warns"
- ],
- "classes": [],
- "functions": [
- {
- "name": "test_n_samples_leaves_roots",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_partial_fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_birch_predict",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_n_clusters",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_sparse_X",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_partial_fit_second_call_error_checks",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "check_branching_factor",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_branching_factor",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "check_threshold",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Use the leaf linked list for traversal"
- },
- {
- "name": "test_threshold",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_birch_n_clusters_long_int",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.cluster.tests.test_dbscan",
- "imports": [
- "import pickle",
- "import numpy as np",
- "import warnings",
- "from scipy.spatial import distance",
- "from scipy import sparse",
- "import pytest",
- "from sklearn.utils._testing import assert_array_equal",
- "from sklearn.neighbors import NearestNeighbors",
- "from sklearn.cluster import DBSCAN",
- "from sklearn.cluster import dbscan",
- "from sklearn.cluster.tests.common import generate_clustered_data",
- "from sklearn.metrics.pairwise import pairwise_distances"
- ],
- "classes": [],
- "functions": [
- {
- "name": "test_dbscan_similarity",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_dbscan_feature",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_dbscan_sparse",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_dbscan_sparse_precomputed",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_dbscan_sparse_precomputed_different_eps",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_dbscan_input_not_modified",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_dbscan_no_core_samples",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_dbscan_callable",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_dbscan_metric_params",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_dbscan_balltree",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_input_validation",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_dbscan_badargs",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_pickle",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_boundaries",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_weighted_dbscan",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_dbscan_core_samples_toy",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_dbscan_precomputed_metric_with_degenerate_input_arrays",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_dbscan_precomputed_metric_with_initial_rows_zero",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.cluster.tests.test_feature_agglomeration",
- "imports": [
- "import numpy as np",
- "from sklearn.cluster import FeatureAgglomeration",
- "from sklearn.utils._testing import assert_no_warnings",
- "from sklearn.utils._testing import assert_array_almost_equal"
- ],
- "classes": [],
- "functions": [
- {
- "name": "test_feature_agglomeration",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.cluster.tests.test_hierarchical",
- "imports": [
- "from tempfile import mkdtemp",
- "import shutil",
- "import pytest",
- "from functools import partial",
- "import numpy as np",
- "from scipy import sparse",
- "from scipy.cluster import hierarchy",
- "from sklearn.metrics.cluster import adjusted_rand_score",
- "from sklearn.utils._testing import assert_almost_equal",
- "from sklearn.utils._testing import assert_array_almost_equal",
- "from sklearn.utils._testing import assert_raise_message",
- "from sklearn.utils._testing import ignore_warnings",
- "from sklearn.cluster import ward_tree",
- "from sklearn.cluster import AgglomerativeClustering",
- "from sklearn.cluster import FeatureAgglomeration",
- "from sklearn.cluster._agglomerative import _hc_cut",
- "from sklearn.cluster._agglomerative import _TREE_BUILDERS",
- "from sklearn.cluster._agglomerative import linkage_tree",
- "from sklearn.cluster._agglomerative import _fix_connectivity",
- "from sklearn.feature_extraction.image import grid_to_graph",
- "from sklearn.metrics.pairwise import PAIRED_DISTANCES",
- "from sklearn.metrics.pairwise import cosine_distances",
- "from sklearn.metrics.pairwise import manhattan_distances",
- "from sklearn.metrics.pairwise import pairwise_distances",
- "from sklearn.metrics.cluster import normalized_mutual_info_score",
- "from sklearn.neighbors import kneighbors_graph",
- "from sklearn.cluster._hierarchical_fast import average_merge",
- "from sklearn.cluster._hierarchical_fast import max_merge",
- "from sklearn.utils._fast_dict import IntFloatDict",
- "from sklearn.utils._testing import assert_array_equal",
- "from sklearn.utils._testing import assert_warns",
- "from sklearn.datasets import make_moons",
- "from sklearn.datasets import make_circles"
- ],
- "classes": [],
- "functions": [
- {
- "name": "test_linkage_misc",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_structured_linkage_tree",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_unstructured_linkage_tree",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_height_linkage_tree",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_agglomerative_clustering_wrong_arg_memory",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_zero_cosine_linkage_tree",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_agglomerative_clustering_distances",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_agglomerative_clustering",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_ward_agglomeration",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_single_linkage_clustering",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "assess_same_labelling",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Util for comparison with scipy"
- },
- {
- "name": "test_sparse_scikit_vs_scipy",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_vector_scikit_single_vs_scipy_single",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_identical_points",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_connectivity_propagation",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_ward_tree_children_order",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_ward_linkage_tree_return_distance",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_connectivity_fixing_non_lil",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_int_float_dict",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_connectivity_callable",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_connectivity_ignores_diagonal",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_compute_full_tree",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_n_components",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_agg_n_clusters",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_affinity_passed_to_fix_connectivity",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_agglomerative_clustering_with_distance_threshold",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_small_distance_threshold",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_cluster_distances_with_distance_threshold",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_agglomerative_clustering_with_distance_threshold_edge_case",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_dist_threshold_invalid_parameters",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_invalid_shape_precomputed_dist_matrix",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.cluster.tests.test_k_means",
- "imports": [
- "import re",
- "import sys",
- "import numpy as np",
- "from scipy import sparse as sp",
- "from threadpoolctl import threadpool_limits",
- "import pytest",
- "from sklearn.utils._testing import assert_array_equal",
- "from sklearn.utils._testing import assert_array_almost_equal",
- "from sklearn.utils._testing import assert_allclose",
- "from sklearn.utils._testing import assert_almost_equal",
- "from sklearn.utils.fixes import _astype_copy_false",
- "from sklearn.base import clone",
- "from sklearn.exceptions import ConvergenceWarning",
- "from sklearn.utils.extmath import row_norms",
- "from sklearn.metrics import pairwise_distances",
- "from sklearn.metrics import pairwise_distances_argmin",
- "from sklearn.metrics.cluster import v_measure_score",
- "from sklearn.cluster import KMeans",
- "from sklearn.cluster import k_means",
- "from sklearn.cluster import kmeans_plusplus",
- "from sklearn.cluster import MiniBatchKMeans",
- "from sklearn.cluster._kmeans import _labels_inertia",
- "from sklearn.cluster._kmeans import _mini_batch_step",
- "from sklearn.cluster._k_means_fast import _relocate_empty_clusters_dense",
- "from sklearn.cluster._k_means_fast import _relocate_empty_clusters_sparse",
- "from sklearn.cluster._k_means_fast import _euclidean_dense_dense_wrapper",
- "from sklearn.cluster._k_means_fast import _euclidean_sparse_dense_wrapper",
- "from sklearn.cluster._k_means_fast import _inertia_dense",
- "from sklearn.cluster._k_means_fast import _inertia_sparse",
- "from sklearn.datasets import make_blobs",
- "from io import StringIO"
- ],
- "classes": [],
- "functions": [
- {
- "name": "test_kmeans_results",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_kmeans_relocated_clusters",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_relocate_empty_clusters",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_kmeans_elkan_results",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_kmeans_convergence",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_minibatch_update_consistency",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_fitted_model",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_all_init",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_minibatch_kmeans_partial_fit_init",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_fortran_aligned_data",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_k_means_fit_predict",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_minibatch_kmeans_verbose",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_kmeans_verbose",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_minibatch_kmeans_warning_init_size",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_warning_n_init_precomputed_centers",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_minibatch_sensible_reassign",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_minibatch_reassign",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_minibatch_with_many_reassignments",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_minibatch_kmeans_init_size",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_kmeans_copyx",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_score_max_iter",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_predict",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_predict_dense_sparse",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_integer_input",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_transform",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_fit_transform",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_n_init",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_k_means_function",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_float_precision",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_centers_not_mutated",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_kmeans_init_fitted_centers",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_kmeans_warns_less_centers_than_unique_points",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_sort_centers",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_weighted_vs_repeated",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_unit_weights_vs_no_weights",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_scaled_weights",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_kmeans_elkan_iter_attribute",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_kmeans_empty_cluster_relocated",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_result_of_kmeans_equal_in_diff_n_threads",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_precompute_distance_deprecated",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_n_jobs_deprecated",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_minibatch_kmeans_deprecated_attributes",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_warning_elkan_1_cluster",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_k_means_1_iteration",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_euclidean_distance",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_inertia",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_sample_weight_unchanged",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_wrong_params",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_kmeans_wrong_params",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_minibatch_kmeans_wrong_params",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_kmeans_plusplus_wrong_params",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_kmeans_plusplus_output",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_kmeans_plusplus_norms",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_kmeans_plusplus_dataorder",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.cluster.tests.test_mean_shift",
- "imports": [
- "import numpy as np",
- "import warnings",
- "import pytest",
- "from scipy import sparse",
- "from sklearn.utils._testing import assert_array_equal",
- "from sklearn.utils._testing import assert_array_almost_equal",
- "from sklearn.utils._testing import assert_raise_message",
- "from sklearn.utils._testing import assert_allclose",
- "from sklearn.cluster import MeanShift",
- "from sklearn.cluster import mean_shift",
- "from sklearn.cluster import estimate_bandwidth",
- "from sklearn.cluster import get_bin_seeds",
- "from sklearn.datasets import make_blobs",
- "from sklearn.metrics import v_measure_score"
- ],
- "classes": [],
- "functions": [
- {
- "name": "test_estimate_bandwidth",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_estimate_bandwidth_1sample",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_mean_shift",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_mean_shift_negative_bandwidth",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_estimate_bandwidth_with_sparse_matrix",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_parallel",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_meanshift_predict",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_meanshift_all_orphans",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_unfitted",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_cluster_intensity_tie",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_bin_seeds",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_max_iter",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_mean_shift_zero_bandwidth",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.cluster.tests.test_optics",
- "imports": [
- "import platform",
- "import sys",
- "import numpy as np",
- "import pytest",
- "from sklearn.datasets import make_blobs",
- "from sklearn.cluster import OPTICS",
- "from sklearn.cluster._optics import _extend_region",
- "from sklearn.cluster._optics import _extract_xi_labels",
- "from sklearn.metrics.cluster import contingency_matrix",
- "from sklearn.metrics.pairwise import pairwise_distances",
- "from sklearn.cluster import DBSCAN",
- "from sklearn.utils import shuffle",
- "from sklearn.utils._testing import assert_array_equal",
- "from sklearn.utils._testing import assert_raise_message",
- "from sklearn.utils._testing import assert_allclose",
- "from sklearn.utils.fixes import sp_version",
- "from sklearn.utils.fixes import parse_version",
- "from sklearn.cluster.tests.common import generate_clustered_data",
- "from sklearn.utils import _IS_32BIT"
- ],
- "classes": [],
- "functions": [
- {
- "name": "test_extend_downward",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_extend_upward",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_the_extract_xi_labels",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_extract_xi",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_cluster_hierarchy_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_correct_number_of_clusters",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_minimum_number_of_sample_check",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_bad_extract",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_bad_reachability",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_close_extract",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_dbscan_optics_parity",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_min_samples_edge_case",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_min_cluster_size",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_min_cluster_size_invalid",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_min_cluster_size_invalid2",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_processing_order",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_compare_to_ELKI",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_wrong_cluster_method",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_extract_dbscan",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_precomputed_dists",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.cluster.tests.test_spectral",
- "imports": [
- "import re",
- "import numpy as np",
- "from scipy import sparse",
- "import pytest",
- "import pickle",
- "from sklearn.utils import check_random_state",
- "from sklearn.utils._testing import assert_array_equal",
- "from sklearn.utils._testing import assert_warns_message",
- "from sklearn.cluster import SpectralClustering",
- "from sklearn.cluster import spectral_clustering",
- "from sklearn.cluster._spectral import discretize",
- "from sklearn.feature_extraction import img_to_graph",
- "from sklearn.metrics import pairwise_distances",
- "from sklearn.metrics import adjusted_rand_score",
- "from sklearn.metrics.pairwise import kernel_metrics",
- "from sklearn.metrics.pairwise import rbf_kernel",
- "from sklearn.neighbors import NearestNeighbors",
- "from sklearn.datasets import make_blobs",
- "from pyamg import smoothed_aggregation_solver"
- ],
- "classes": [],
- "functions": [
- {
- "name": "test_spectral_clustering",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_spectral_unknown_mode",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_spectral_unknown_assign_labels",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_spectral_clustering_sparse",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_precomputed_nearest_neighbors_filtering",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_affinities",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_discretize",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_spectral_clustering_with_arpack_amg_solvers",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_n_components",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_verbose",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_pairwise_is_deprecated",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.cluster.tests",
- "imports": [],
- "classes": [],
- "functions": []
- },
- {
- "name": "sklearn.compose._column_transformer",
- "imports": [
- "from itertools import chain",
- "import numpy as np",
- "from scipy import sparse",
- "from joblib import Parallel",
- "from base import clone",
- "from base import TransformerMixin",
- "from utils._estimator_html_repr import _VisualBlock",
- "from pipeline import _fit_transform_one",
- "from pipeline import _transform_one",
- "from pipeline import _name_estimators",
- "from preprocessing import FunctionTransformer",
- "from utils import Bunch",
- "from utils import _safe_indexing",
- "from utils import _get_column_indices",
- "from utils import _determine_key_type",
- "from utils.metaestimators import _BaseComposition",
- "from utils.validation import check_array",
- "from utils.validation import check_is_fitted",
- "from utils.validation import _deprecate_positional_args",
- "from utils.fixes import delayed"
- ],
- "classes": [
- {
- "name": "ColumnTransformer",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "transformers",
- "type": "List",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "List of (name, transformer, columns) tuples specifying the transformer objects to be applied to subsets of the data. name : str Like in Pipeline and FeatureUnion, this allows the transformer and its parameters to be set using ``set_params`` and searched in grid search. transformer : {'drop', 'passthrough'} or estimator Estimator must support :term:`fit` and :term:`transform`. Special-cased strings 'drop' and 'passthrough' are accepted as well, to indicate to drop the columns or to pass them through untransformed, respectively. columns : str, array-like of str, int, array-like of int, array-like of bool, slice or callable Indexes the data on its second axis. Integers are interpreted as positional columns, while strings can reference DataFrame columns by name. A scalar string or int should be used where ``transformer`` expects X to be a 1d array-like (vector), otherwise a 2d array will be passed to the transformer. A callable is passed the input data `X` and can return any of the above. To select multiple columns by name or dtype, you can use :obj:`make_column_selector`."
- },
- {
- "name": "remainder",
- "type": "Literal['drop', 'passthrough']",
- "hasDefault": true,
- "default": "'drop'",
- "limitation": null,
- "ignored": false,
- "docstring": "By default, only the specified columns in `transformers` are transformed and combined in the output, and the non-specified columns are dropped. (default of ``'drop'``). By specifying ``remainder='passthrough'``, all remaining columns that were not specified in `transformers` will be automatically passed through. This subset of columns is concatenated with the output of the transformers. By setting ``remainder`` to be an estimator, the remaining non-specified columns will use the ``remainder`` estimator. The estimator must support :term:`fit` and :term:`transform`. Note that using this feature requires that the DataFrame columns input at :term:`fit` and :term:`transform` have identical order."
- },
- {
- "name": "sparse_threshold",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "If the output of the different transformers contains sparse matrices, these will be stacked as a sparse matrix if the overall density is lower than this value. Use ``sparse_threshold=0`` to always return dense. When the transformed output consists of all dense data, the stacked result will be dense, and this keyword will be ignored."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details."
- },
- {
- "name": "transformer_weights",
- "type": "Dict",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Multiplicative weights for features per transformer. The output of the transformer is multiplied by these weights. Keys are transformer names, values the weights."
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the time elapsed while fitting each transformer will be printed as it is completed."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_transformers",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "get_params",
- "decorators": [],
- "parameters": [
- {
- "name": "deep",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, will return the parameters for this estimator and contained subobjects that are estimators."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Get parameters for this estimator.\n\nReturns the parameters given in the constructor as well as the\nestimators contained within the `transformers` of the\n`ColumnTransformer`.\n\nParameters\n----------\ndeep : bool, default=True\n If True, will return the parameters for this estimator and\n contained subobjects that are estimators.\n\nReturns\n-------\nparams : dict\n Parameter names mapped to their values."
- },
- {
- "name": "set_params",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Set the parameters of this estimator.\n\nValid parameter keys can be listed with ``get_params()``. Note that you\ncan directly set the parameters of the estimators contained in\n`transformers` of `ColumnTransformer`.\n\nReturns\n-------\nself"
- },
- {
- "name": "_iter",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generate (name, trans, column, weight) tuples.\n\nIf fitted=True, use the fitted transformers, else use the\nuser specified transformers updated with converted column names\nand potentially appended with transformer for remainder."
- },
- {
- "name": "_validate_transformers",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_validate_column_callables",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Converts callable column specifications."
- },
- {
- "name": "_validate_remainder",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Validates ``remainder`` and defines ``_remainder`` targeting\nthe remaining columns."
- },
- {
- "name": "named_transformers_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Access the fitted transformer by name.\n\nRead-only attribute to access any transformer by given name.\nKeys are transformer names and values are the fitted transformer\nobjects."
- },
- {
- "name": "get_feature_names",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Get feature names from all transformers.\n\nReturns\n-------\nfeature_names : list of strings\n Names of the features produced by transform."
- },
- {
- "name": "_update_fitted_transformers",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_validate_output",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Ensure that the output of each transformer is 2D. Otherwise\nhstack can raise an error or produce incorrect results."
- },
- {
- "name": "_log_message",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_fit_transform",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Private function to fit and/or transform on demand.\n\nReturn value (transformers and/or transformed X data) depends\non the passed function.\n``fitted=True`` ensures the fitted transformers are used."
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data, of which specified subsets are used to fit the transformers."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Targets for supervised learning."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit all transformers using X.\n\nParameters\n----------\nX : {array-like, dataframe} of shape (n_samples, n_features)\n Input data, of which specified subsets are used to fit the\n transformers.\n\ny : array-like of shape (n_samples,...), default=None\n Targets for supervised learning.\n\nReturns\n-------\nself : ColumnTransformer\n This estimator"
- },
- {
- "name": "fit_transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Input data, of which specified subsets are used to fit the transformers."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Targets for supervised learning."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fit all transformers, transform the data and concatenate results.\n\nParameters\n----------\nX : {array-like, dataframe} of shape (n_samples, n_features)\n Input data, of which specified subsets are used to fit the\n transformers.\n\ny : array-like of shape (n_samples,), default=None\n Targets for supervised learning.\n\nReturns\n-------\nX_t : {array-like, sparse matrix} of shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers. If\n any result is a sparse matrix, everything will be converted to\n sparse matrices."
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The data to be transformed by subset."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Transform X separately by each transformer, concatenate results.\n\nParameters\n----------\nX : {array-like, dataframe} of shape (n_samples, n_features)\n The data to be transformed by subset.\n\nReturns\n-------\nX_t : {array-like, sparse matrix} of shape (n_samples, sum_n_components)\n hstack of results of transformers. sum_n_components is the\n sum of n_components (output dimension) over transformers. If\n any result is a sparse matrix, everything will be converted to\n sparse matrices."
- },
- {
- "name": "_hstack",
- "decorators": [],
- "parameters": [
- {
- "name": "Xs",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": ""
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Stacks Xs horizontally.\n\nThis allows subclasses to control the stacking behavior, while reusing\neverything else from ColumnTransformer.\n\nParameters\n----------\nXs : list of {array-like, sparse matrix, dataframe}"
- },
- {
- "name": "_sk_visual_block_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Applies transformers to columns of an array or pandas DataFrame.\n\nThis estimator allows different columns or column subsets of the input\nto be transformed separately and the features generated by each transformer\nwill be concatenated to form a single feature space.\nThis is useful for heterogeneous or columnar data, to combine several\nfeature extraction mechanisms or transformations into a single transformer.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\ntransformers : list of tuples\n List of (name, transformer, columns) tuples specifying the\n transformer objects to be applied to subsets of the data.\n\n name : str\n Like in Pipeline and FeatureUnion, this allows the transformer and\n its parameters to be set using ``set_params`` and searched in grid\n search.\n transformer : {'drop', 'passthrough'} or estimator\n Estimator must support :term:`fit` and :term:`transform`.\n Special-cased strings 'drop' and 'passthrough' are accepted as\n well, to indicate to drop the columns or to pass them through\n untransformed, respectively.\n columns : str, array-like of str, int, array-like of int, array-like of bool, slice or callable\n Indexes the data on its second axis. Integers are interpreted as\n positional columns, while strings can reference DataFrame columns\n by name. A scalar string or int should be used where\n ``transformer`` expects X to be a 1d array-like (vector),\n otherwise a 2d array will be passed to the transformer.\n A callable is passed the input data `X` and can return any of the\n above. To select multiple columns by name or dtype, you can use\n :obj:`make_column_selector`.\n\nremainder : {'drop', 'passthrough'} or estimator, default='drop'\n By default, only the specified columns in `transformers` are\n transformed and combined in the output, and the non-specified\n columns are dropped. (default of ``'drop'``).\n By specifying ``remainder='passthrough'``, all remaining columns that\n were not specified in `transformers` will be automatically passed\n through. This subset of columns is concatenated with the output of\n the transformers.\n By setting ``remainder`` to be an estimator, the remaining\n non-specified columns will use the ``remainder`` estimator. The\n estimator must support :term:`fit` and :term:`transform`.\n Note that using this feature requires that the DataFrame columns\n input at :term:`fit` and :term:`transform` have identical order.\n\nsparse_threshold : float, default=0.3\n If the output of the different transformers contains sparse matrices,\n these will be stacked as a sparse matrix if the overall density is\n lower than this value. Use ``sparse_threshold=0`` to always return\n dense. When the transformed output consists of all dense data, the\n stacked result will be dense, and this keyword will be ignored.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\ntransformer_weights : dict, default=None\n Multiplicative weights for features per transformer. The output of the\n transformer is multiplied by these weights. Keys are transformer names,\n values the weights.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\nAttributes\n----------\ntransformers_ : list\n The collection of fitted transformers as tuples of\n (name, fitted_transformer, column). `fitted_transformer` can be an\n estimator, 'drop', or 'passthrough'. In case there were no columns\n selected, this will be the unfitted transformer.\n If there are remaining columns, the final element is a tuple of the\n form:\n ('remainder', transformer, remaining_columns) corresponding to the\n ``remainder`` parameter. If there are remaining columns, then\n ``len(transformers_)==len(transformers)+1``, otherwise\n ``len(transformers_)==len(transformers)``.\n\nnamed_transformers_ : :class:`~sklearn.utils.Bunch`\n Read-only attribute to access any transformer by given name.\n Keys are transformer names and values are the fitted transformer\n objects.\n\nsparse_output_ : bool\n Boolean flag indicating whether the output of ``transform`` is a\n sparse matrix or a dense numpy array, which depends on the output\n of the individual transformers and the `sparse_threshold` keyword.\n\nNotes\n-----\nThe order of the columns in the transformed feature matrix follows the\norder of how the columns are specified in the `transformers` list.\nColumns of the original feature matrix that are not specified are\ndropped from the resulting transformed feature matrix, unless specified\nin the `passthrough` keyword. Those columns specified with `passthrough`\nare added at the right to the output of the transformers.\n\nSee Also\n--------\nmake_column_transformer : Convenience function for\n combining the outputs of multiple transformer objects applied to\n column subsets of the original feature space.\nmake_column_selector : Convenience function for selecting\n columns based on datatype or the columns name with a regex pattern.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.compose import ColumnTransformer\n>>> from sklearn.preprocessing import Normalizer\n>>> ct = ColumnTransformer(\n... [(\"norm1\", Normalizer(norm='l1'), [0, 1]),\n... (\"norm2\", Normalizer(norm='l1'), slice(2, 4))])\n>>> X = np.array([[0., 1., 2., 2.],\n... [1., 1., 0., 1.]])\n>>> # Normalizer scales each row of X to unit norm. A separate scaling\n>>> # is applied for the two first and two last elements of each\n>>> # row independently.\n>>> ct.fit_transform(X)\narray([[0. , 1. , 0.5, 0.5],\n [0.5, 0.5, 0. , 1. ]])"
- },
- {
- "name": "make_column_selector",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "pattern",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Name of columns containing this regex pattern will be included. If None, column selection will not be selected based on pattern."
- },
- {
- "name": "dtype_include",
- "type": "List",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "A selection of dtypes to include. For more details, see :meth:`pandas.DataFrame.select_dtypes`."
- },
- {
- "name": "dtype_exclude",
- "type": "List",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "A selection of dtypes to exclude. For more details, see :meth:`pandas.DataFrame.select_dtypes`."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "__call__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Create a callable to select columns to be used with\n:class:`ColumnTransformer`.\n\n:func:`make_column_selector` can select columns based on datatype or the\ncolumns name with a regex. When using multiple selection criteria, **all**\ncriteria must match for a column to be selected.\n\nParameters\n----------\npattern : str, default=None\n Name of columns containing this regex pattern will be included. If\n None, column selection will not be selected based on pattern.\n\ndtype_include : column dtype or list of column dtypes, default=None\n A selection of dtypes to include. For more details, see\n :meth:`pandas.DataFrame.select_dtypes`.\n\ndtype_exclude : column dtype or list of column dtypes, default=None\n A selection of dtypes to exclude. For more details, see\n :meth:`pandas.DataFrame.select_dtypes`.\n\nReturns\n-------\nselector : callable\n Callable for column selection to be used by a\n :class:`ColumnTransformer`.\n\nSee Also\n--------\nColumnTransformer : Class that allows combining the\n outputs of multiple transformer objects used on column subsets\n of the data into a single feature space.\n\nExamples\n--------\n>>> from sklearn.preprocessing import StandardScaler, OneHotEncoder\n>>> from sklearn.compose import make_column_transformer\n>>> from sklearn.compose import make_column_selector\n>>> import pandas as pd # doctest: +SKIP\n>>> X = pd.DataFrame({'city': ['London', 'London', 'Paris', 'Sallisaw'],\n... 'rating': [5, 3, 4, 5]}) # doctest: +SKIP\n>>> ct = make_column_transformer(\n... (StandardScaler(),\n... make_column_selector(dtype_include=np.number)), # rating\n... (OneHotEncoder(),\n... make_column_selector(dtype_include=object))) # city\n>>> ct.fit_transform(X) # doctest: +SKIP\narray([[ 0.90453403, 1. , 0. , 0. ],\n [-1.50755672, 1. , 0. , 0. ],\n [-0.30151134, 0. , 1. , 0. ],\n [ 0.90453403, 0. , 0. , 1. ]])"
- }
- ],
- "functions": [
- {
- "name": "_check_X",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Use check_array only on lists and other non-array-likes / sparse"
- },
- {
- "name": "_is_empty_column_selection",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return True if the column selection is empty (empty list or all-False\nboolean array)."
- },
- {
- "name": "_get_transformer_list",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Construct (name, trans, column) tuples from list"
- },
- {
- "name": "make_column_transformer",
- "decorators": [],
- "parameters": [
- {
- "name": "*transformers",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Tuples of the form (transformer, columns) specifying the transformer objects to be applied to subsets of the data. transformer : {'drop', 'passthrough'} or estimator Estimator must support :term:`fit` and :term:`transform`. Special-cased strings 'drop' and 'passthrough' are accepted as well, to indicate to drop the columns or to pass them through untransformed, respectively. columns : str, array-like of str, int, array-like of int, slice, array-like of bool or callable Indexes the data on its second axis. Integers are interpreted as positional columns, while strings can reference DataFrame columns by name. A scalar string or int should be used where ``transformer`` expects X to be a 1d array-like (vector), otherwise a 2d array will be passed to the transformer. A callable is passed the input data `X` and can return any of the above. To select multiple columns by name or dtype, you can use :obj:`make_column_selector`."
- },
- {
- "name": "remainder",
- "type": "Literal['drop', 'passthrough']",
- "hasDefault": true,
- "default": "'drop'",
- "limitation": null,
- "ignored": false,
- "docstring": "By default, only the specified columns in `transformers` are transformed and combined in the output, and the non-specified columns are dropped. (default of ``'drop'``). By specifying ``remainder='passthrough'``, all remaining columns that were not specified in `transformers` will be automatically passed through. This subset of columns is concatenated with the output of the transformers. By setting ``remainder`` to be an estimator, the remaining non-specified columns will use the ``remainder`` estimator. The estimator must support :term:`fit` and :term:`transform`."
- },
- {
- "name": "sparse_threshold",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "If the transformed output consists of a mix of sparse and dense data, it will be stacked as a sparse matrix if the density is lower than this value. Use ``sparse_threshold=0`` to always return dense. When the transformed output consists of all sparse or all dense data, the stacked result will be sparse or dense, respectively, and this keyword will be ignored."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details."
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the time elapsed while fitting each transformer will be printed as it is completed."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Construct a ColumnTransformer from the given transformers.\n\nThis is a shorthand for the ColumnTransformer constructor; it does not\nrequire, and does not permit, naming the transformers. Instead, they will\nbe given names automatically based on their types. It also does not allow\nweighting with ``transformer_weights``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\n*transformers : tuples\n Tuples of the form (transformer, columns) specifying the\n transformer objects to be applied to subsets of the data.\n\n transformer : {'drop', 'passthrough'} or estimator\n Estimator must support :term:`fit` and :term:`transform`.\n Special-cased strings 'drop' and 'passthrough' are accepted as\n well, to indicate to drop the columns or to pass them through\n untransformed, respectively.\n columns : str, array-like of str, int, array-like of int, slice, array-like of bool or callable\n Indexes the data on its second axis. Integers are interpreted as\n positional columns, while strings can reference DataFrame columns\n by name. A scalar string or int should be used where\n ``transformer`` expects X to be a 1d array-like (vector),\n otherwise a 2d array will be passed to the transformer.\n A callable is passed the input data `X` and can return any of the\n above. To select multiple columns by name or dtype, you can use\n :obj:`make_column_selector`.\n\nremainder : {'drop', 'passthrough'} or estimator, default='drop'\n By default, only the specified columns in `transformers` are\n transformed and combined in the output, and the non-specified\n columns are dropped. (default of ``'drop'``).\n By specifying ``remainder='passthrough'``, all remaining columns that\n were not specified in `transformers` will be automatically passed\n through. This subset of columns is concatenated with the output of\n the transformers.\n By setting ``remainder`` to be an estimator, the remaining\n non-specified columns will use the ``remainder`` estimator. The\n estimator must support :term:`fit` and :term:`transform`.\n\nsparse_threshold : float, default=0.3\n If the transformed output consists of a mix of sparse and dense data,\n it will be stacked as a sparse matrix if the density is lower than this\n value. Use ``sparse_threshold=0`` to always return dense.\n When the transformed output consists of all sparse or all dense data,\n the stacked result will be sparse or dense, respectively, and this\n keyword will be ignored.\n\nn_jobs : int, default=None\n Number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\nverbose : bool, default=False\n If True, the time elapsed while fitting each transformer will be\n printed as it is completed.\n\nReturns\n-------\nct : ColumnTransformer\n\nSee Also\n--------\nColumnTransformer : Class that allows combining the\n outputs of multiple transformer objects used on column subsets\n of the data into a single feature space.\n\nExamples\n--------\n>>> from sklearn.preprocessing import StandardScaler, OneHotEncoder\n>>> from sklearn.compose import make_column_transformer\n>>> make_column_transformer(\n... (StandardScaler(), ['numerical_column']),\n... (OneHotEncoder(), ['categorical_column']))\nColumnTransformer(transformers=[('standardscaler', StandardScaler(...),\n ['numerical_column']),\n ('onehotencoder', OneHotEncoder(...),\n ['categorical_column'])])"
- }
- ]
- },
- {
- "name": "sklearn.compose._target",
- "imports": [
- "import warnings",
- "import numpy as np",
- "from base import BaseEstimator",
- "from base import RegressorMixin",
- "from base import clone",
- "from utils.validation import check_is_fitted",
- "from utils import check_array",
- "from utils import _safe_indexing",
- "from preprocessing import FunctionTransformer",
- "from utils.validation import _deprecate_positional_args",
- "from exceptions import NotFittedError",
- "from linear_model import LinearRegression"
- ],
- "classes": [
- {
- "name": "TransformedTargetRegressor",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "regressor",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Regressor object such as derived from ``RegressorMixin``. This regressor will automatically be cloned each time prior to fitting. If regressor is ``None``, ``LinearRegression()`` is created and used."
- },
- {
- "name": "transformer",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Estimator object such as derived from ``TransformerMixin``. Cannot be set at the same time as ``func`` and ``inverse_func``. If ``transformer`` is ``None`` as well as ``func`` and ``inverse_func``, the transformer will be an identity transformer. Note that the transformer will be cloned during fitting. Also, the transformer is restricting ``y`` to be a numpy array."
- },
- {
- "name": "func",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Function to apply to ``y`` before passing to ``fit``. Cannot be set at the same time as ``transformer``. The function needs to return a 2-dimensional array. If ``func`` is ``None``, the function used will be the identity function."
- },
- {
- "name": "inverse_func",
- "type": null,
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Function to apply to the prediction of the regressor. Cannot be set at the same time as ``transformer`` as well. The function needs to return a 2-dimensional array. The inverse function is used to return predictions to the same space of the original training labels."
- },
- {
- "name": "check_inverse",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to check that ``transform`` followed by ``inverse_transform`` or ``func`` followed by ``inverse_func`` leads to the original targets."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_fit_transformer",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Check transformer and fit transformer.\n\nCreate the default transformer, fit it and make additional inverse\ncheck on a subset (optional)."
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training vector, where n_samples is the number of samples and n_features is the number of features."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target values."
- },
- {
- "name": "**fit_params",
- "type": "Dict",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Parameters passed to the ``fit`` method of the underlying regressor."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the model according to the given training data.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training vector, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : array-like of shape (n_samples,)\n Target values.\n\n**fit_params : dict\n Parameters passed to the ``fit`` method of the underlying\n regressor.\n\n\nReturns\n-------\nself : object"
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Samples."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Predict using the base regressor, applying inverse.\n\nThe regressor is used to predict and the ``inverse_func`` or\n``inverse_transform`` is applied before returning the prediction.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Samples.\n\nReturns\n-------\ny_hat : ndarray of shape (n_samples,)\n Predicted values."
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "n_features_in_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Meta-estimator to regress on a transformed target.\n\nUseful for applying a non-linear transformation to the target ``y`` in\nregression problems. This transformation can be given as a Transformer\nsuch as the QuantileTransformer or as a function and its inverse such as\n``log`` and ``exp``.\n\nThe computation during ``fit`` is::\n\n regressor.fit(X, func(y))\n\nor::\n\n regressor.fit(X, transformer.transform(y))\n\nThe computation during ``predict`` is::\n\n inverse_func(regressor.predict(X))\n\nor::\n\n transformer.inverse_transform(regressor.predict(X))\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nregressor : object, default=None\n Regressor object such as derived from ``RegressorMixin``. This\n regressor will automatically be cloned each time prior to fitting.\n If regressor is ``None``, ``LinearRegression()`` is created and used.\n\ntransformer : object, default=None\n Estimator object such as derived from ``TransformerMixin``. Cannot be\n set at the same time as ``func`` and ``inverse_func``. If\n ``transformer`` is ``None`` as well as ``func`` and ``inverse_func``,\n the transformer will be an identity transformer. Note that the\n transformer will be cloned during fitting. Also, the transformer is\n restricting ``y`` to be a numpy array.\n\nfunc : function, default=None\n Function to apply to ``y`` before passing to ``fit``. Cannot be set at\n the same time as ``transformer``. The function needs to return a\n 2-dimensional array. If ``func`` is ``None``, the function used will be\n the identity function.\n\ninverse_func : function, default=None\n Function to apply to the prediction of the regressor. Cannot be set at\n the same time as ``transformer`` as well. The function needs to return\n a 2-dimensional array. The inverse function is used to return\n predictions to the same space of the original training labels.\n\ncheck_inverse : bool, default=True\n Whether to check that ``transform`` followed by ``inverse_transform``\n or ``func`` followed by ``inverse_func`` leads to the original targets.\n\nAttributes\n----------\nregressor_ : object\n Fitted regressor.\n\ntransformer_ : object\n Transformer used in ``fit`` and ``predict``.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LinearRegression\n>>> from sklearn.compose import TransformedTargetRegressor\n>>> tt = TransformedTargetRegressor(regressor=LinearRegression(),\n... func=np.log, inverse_func=np.exp)\n>>> X = np.arange(4).reshape(-1, 1)\n>>> y = np.exp(2 * X).ravel()\n>>> tt.fit(X, y)\nTransformedTargetRegressor(...)\n>>> tt.score(X, y)\n1.0\n>>> tt.regressor_.coef_\narray([2.])\n\nNotes\n-----\nInternally, the target ``y`` is always converted into a 2-dimensional array\nto be used by scikit-learn transformers. At the time of prediction, the\noutput will be reshaped to a have the same number of dimensions as ``y``.\n\nSee :ref:`examples/compose/plot_transformed_target.py\n`."
- }
- ],
- "functions": []
- },
- {
- "name": "sklearn.compose",
- "imports": [
- "from _column_transformer import ColumnTransformer",
- "from _column_transformer import make_column_transformer",
- "from _column_transformer import make_column_selector",
- "from _target import TransformedTargetRegressor"
- ],
- "classes": [],
- "functions": []
- },
- {
- "name": "sklearn.compose.tests.test_column_transformer",
- "imports": [
- "import re",
- "import pickle",
- "import warnings",
- "import numpy as np",
- "from scipy import sparse",
- "import pytest",
- "from numpy.testing import assert_allclose",
- "from sklearn.utils._testing import assert_raise_message",
- "from sklearn.utils._testing import assert_array_equal",
- "from sklearn.utils._testing import assert_allclose_dense_sparse",
- "from sklearn.utils._testing import assert_almost_equal",
- "from sklearn.base import BaseEstimator",
- "from sklearn.compose import ColumnTransformer",
- "from sklearn.compose import make_column_transformer",
- "from sklearn.compose import make_column_selector",
- "from sklearn.exceptions import NotFittedError",
- "from sklearn.preprocessing import FunctionTransformer",
- "from sklearn.preprocessing import StandardScaler",
- "from sklearn.preprocessing import Normalizer",
- "from sklearn.preprocessing import OneHotEncoder",
- "from sklearn.feature_extraction import DictVectorizer"
- ],
- "classes": [
- {
- "name": "Trans",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": null
- },
- {
- "name": "DoubleTrans",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": null
- },
- {
- "name": "SparseMatrixTrans",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": null
- },
- {
- "name": "TransNo2D",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": null
- },
- {
- "name": "TransRaise",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": null
- }
- ],
- "functions": [
- {
- "name": "test_column_transformer",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_dataframe",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_empty_columns",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_sparse_array",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_list",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_sparse_stacking",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_mixed_cols_sparse",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_sparse_threshold",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_error_msg_1D",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_2D_transformer_output",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_2D_transformer_output_pandas",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_invalid_columns",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_invalid_transformer",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_make_column_transformer",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_make_column_transformer_pandas",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_make_column_transformer_kwargs",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_make_column_transformer_remainder_transformer",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_get_set_params",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_named_estimators",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_cloning",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_get_feature_names",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_get_feature_names_dataframe",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_special_strings",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_remainder",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_remainder_numpy",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_remainder_pandas",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_remainder_transformer",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_no_remaining_remainder_transformer",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_drops_all_remainder_transformer",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_sparse_remainder_transformer",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_drop_all_sparse_remainder_transformer",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_get_set_params_with_remainder",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_no_estimators",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_verbose",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_no_estimators_set_params",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_callable_specifier",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_callable_specifier_dataframe",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_negative_column_indexes",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_reordered_column_names_remainder",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Regression test for issue #14223: 'Named col indexing fails with\nColumnTransformer remainder on changing DataFrame column ordering'\n\nShould raise error on changed order combined with remainder.\nShould allow for added columns in `transform` input DataFrame\nas long as all preceding columns match."
- },
- {
- "name": "test_feature_name_validation",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Tests if the proper warning/error is raised if the columns do not match\nduring fit and transform."
- },
- {
- "name": "test_column_transformer_mask_indexing",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_n_features_in",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_make_column_selector_with_select_dtypes",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_column_transformer_with_make_column_selector",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_make_column_selector_error",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_make_column_selector_pickle",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_feature_names_empty_columns",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_sk_visual_block_remainder",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_sk_visual_block_remainder_drop",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_sk_visual_block_remainder_fitted_pandas",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_sk_visual_block_remainder_fitted_numpy",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.compose.tests.test_target",
- "imports": [
- "import numpy as np",
- "import pytest",
- "from sklearn.base import clone",
- "from sklearn.base import BaseEstimator",
- "from sklearn.base import TransformerMixin",
- "from sklearn.dummy import DummyRegressor",
- "from sklearn.utils._testing import assert_allclose",
- "from sklearn.utils._testing import assert_warns_message",
- "from sklearn.utils._testing import assert_no_warnings",
- "from sklearn.preprocessing import FunctionTransformer",
- "from sklearn.preprocessing import StandardScaler",
- "from sklearn.pipeline import Pipeline",
- "from sklearn.linear_model import LinearRegression",
- "from sklearn.linear_model import OrthogonalMatchingPursuit",
- "from sklearn import datasets",
- "from sklearn.compose import TransformedTargetRegressor"
- ],
- "classes": [
- {
- "name": "DummyCheckerArrayTransformer",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "inverse_transform",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": null
- },
- {
- "name": "DummyCheckerListRegressor",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": null
- },
- {
- "name": "DummyTransformer",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "inverse_transform",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Dummy transformer which count how many time fit was called."
- },
- {
- "name": "DummyRegressorWithExtraFitParams",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": null
- }
- ],
- "functions": [
- {
- "name": "test_transform_target_regressor_error",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_transform_target_regressor_invertible",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_standard_scaled",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_check_shifted_by_one",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_transform_target_regressor_functions",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_transform_target_regressor_functions_multioutput",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_transform_target_regressor_1d_transformer",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_transform_target_regressor_2d_transformer",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_transform_target_regressor_2d_transformer_multioutput",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_transform_target_regressor_multi_to_single",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_transform_target_regressor_ensure_y_array",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_transform_target_regressor_count_fit",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_transform_target_regressor_pass_fit_parameters",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_transform_target_regressor_route_pipeline",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.compose.tests",
- "imports": [],
- "classes": [],
- "functions": []
- },
- {
- "name": "sklearn.covariance._elliptic_envelope",
- "imports": [
- "import numpy as np",
- "from None import MinCovDet",
- "from utils.validation import check_is_fitted",
- "from utils.validation import check_array",
- "from utils.validation import _deprecate_positional_args",
- "from metrics import accuracy_score",
- "from base import OutlierMixin"
- ],
- "classes": [
- {
- "name": "EllipticEnvelope",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "store_precision",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Specify if the estimated precision is stored."
- },
- {
- "name": "assume_centered",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the support of robust location and covariance estimates is computed, and a covariance estimate is recomputed from it, without centering the data. Useful to work with data whose mean is significantly equal to zero but is not exactly zero. If False, the robust location and covariance are directly computed with the FastMCD algorithm without additional treatment."
- },
- {
- "name": "support_fraction",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The proportion of points to be included in the support of the raw MCD estimate. If None, the minimum value of support_fraction will be used within the algorithm: `[n_sample + n_features + 1] / 2`. Range is (0, 1)."
- },
- {
- "name": "contamination",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "The amount of contamination of the data set, i.e. the proportion of outliers in the data set. Range is (0, 0.5)."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines the pseudo random number generator for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fit the EllipticEnvelope model.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n Training data.\n\ny : Ignored\n Not used, present for API consistency by convention."
- },
- {
- "name": "decision_function",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The data matrix."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Compute the decision function of the given observations.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\ndecision : ndarray of shape (n_samples,)\n Decision function of the samples.\n It is equal to the shifted Mahalanobis distances.\n The threshold for being an outlier is 0, which ensures a\n compatibility with other outlier detection algorithms."
- },
- {
- "name": "score_samples",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The data matrix."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Compute the negative Mahalanobis distances.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\nnegative_mahal_distances : array-like of shape (n_samples,)\n Opposite of the Mahalanobis distances."
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The data matrix."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Predict the labels (1 inlier, -1 outlier) of X according to the\nfitted model.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix.\n\nReturns\n-------\nis_inlier : ndarray of shape (n_samples,)\n Returns -1 for anomalies/outliers and +1 for inliers."
- },
- {
- "name": "score",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Test samples."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "True labels for X."
- },
- {
- "name": "sample_weight",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Sample weights."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Returns the mean accuracy on the given test data and labels.\n\nIn multi-label classification, this is the subset accuracy\nwhich is a harsh metric since you require for each sample that\neach label set be correctly predicted.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Test samples.\n\ny : array-like of shape (n_samples,) or (n_samples, n_outputs)\n True labels for X.\n\nsample_weight : array-like of shape (n_samples,), default=None\n Sample weights.\n\nReturns\n-------\nscore : float\n Mean accuracy of self.predict(X) w.r.t. y."
- }
- ],
- "docstring": "An object for detecting outliers in a Gaussian distributed dataset.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, the support of robust location and covariance estimates\n is computed, and a covariance estimate is recomputed from it,\n without centering the data.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, the robust location and covariance are directly computed\n with the FastMCD algorithm without additional treatment.\n\nsupport_fraction : float, default=None\n The proportion of points to be included in the support of the raw\n MCD estimate. If None, the minimum value of support_fraction will\n be used within the algorithm: `[n_sample + n_features + 1] / 2`.\n Range is (0, 1).\n\ncontamination : float, default=0.1\n The amount of contamination of the data set, i.e. the proportion\n of outliers in the data set. Range is (0, 0.5).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling\n the data. Pass an int for reproducible results across multiple function\n calls. See :term: `Glossary `.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n Estimated robust location.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated robust covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nsupport_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute the\n robust estimates of location and shape.\n\noffset_ : float\n Offset used to define the decision function from the raw scores.\n We have the relation: ``decision_function = score_samples - offset_``.\n The offset depends on the contamination parameter and is defined in\n such a way we obtain the expected number of outliers (samples with\n decision function < 0) in training.\n\n .. versionadded:: 0.20\n\nraw_location_ : ndarray of shape (n_features,)\n The raw robust estimated location before correction and re-weighting.\n\nraw_covariance_ : ndarray of shape (n_features, n_features)\n The raw robust estimated covariance before correction and re-weighting.\n\nraw_support_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute\n the raw robust estimates of location and shape, before correction\n and re-weighting.\n\ndist_ : ndarray of shape (n_samples,)\n Mahalanobis distances of the training set (on which :meth:`fit` is\n called) observations.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import EllipticEnvelope\n>>> true_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> X = np.random.RandomState(0).multivariate_normal(mean=[0, 0],\n... cov=true_cov,\n... size=500)\n>>> cov = EllipticEnvelope(random_state=0).fit(X)\n>>> # predict returns 1 for an inlier and -1 for an outlier\n>>> cov.predict([[0, 0],\n... [3, 3]])\narray([ 1, -1])\n>>> cov.covariance_\narray([[0.7411..., 0.2535...],\n [0.2535..., 0.3053...]])\n>>> cov.location_\narray([0.0813... , 0.0427...])\n\nSee Also\n--------\nEmpiricalCovariance, MinCovDet\n\nNotes\n-----\nOutlier detection from covariance estimation may break or not\nperform well in high-dimensional settings. In particular, one will\nalways take care to work with ``n_samples > n_features ** 2``.\n\nReferences\n----------\n.. [1] Rousseeuw, P.J., Van Driessen, K. \"A fast algorithm for the\n minimum covariance determinant estimator\" Technometrics 41(3), 212\n (1999)"
- }
- ],
- "functions": []
- },
- {
- "name": "sklearn.covariance._empirical_covariance",
- "imports": [
- "import warnings",
- "import numpy as np",
- "from scipy import linalg",
- "from base import BaseEstimator",
- "from utils import check_array",
- "from utils.extmath import fast_logdet",
- "from metrics.pairwise import pairwise_distances",
- "from utils.validation import _deprecate_positional_args"
- ],
- "classes": [
- {
- "name": "EmpiricalCovariance",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "store_precision",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Specifies if the estimated precision is stored."
- },
- {
- "name": "assume_centered",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, data are not centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False (default), data are centered before computation."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_set_covariance",
- "decorators": [],
- "parameters": [
- {
- "name": "covariance",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Estimated covariance matrix to be stored, and from which precision is computed."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Saves the covariance and precision estimates\n\nStorage is done accordingly to `self.store_precision`.\nPrecision stored only if invertible.\n\nParameters\n----------\ncovariance : array-like of shape (n_features, n_features)\n Estimated covariance matrix to be stored, and from which precision\n is computed."
- },
- {
- "name": "get_precision",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Getter for the precision matrix.\n\nReturns\n-------\nprecision_ : array-like of shape (n_features, n_features)\n The precision matrix associated to the current covariance object."
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fits the Maximum Likelihood Estimator covariance model\naccording to the given training data and parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples and\n n_features is the number of features.\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object"
- },
- {
- "name": "score",
- "decorators": [],
- "parameters": [
- {
- "name": "X_test",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Test data of which we compute the likelihood, where n_samples is the number of samples and n_features is the number of features. X_test is assumed to be drawn from the same distribution than the data used in fit (including centering)."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Computes the log-likelihood of a Gaussian data set with\n`self.covariance_` as an estimator of its covariance matrix.\n\nParameters\n----------\nX_test : array-like of shape (n_samples, n_features)\n Test data of which we compute the likelihood, where n_samples is\n the number of samples and n_features is the number of features.\n X_test is assumed to be drawn from the same distribution than\n the data used in fit (including centering).\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nres : float\n The likelihood of the data set with `self.covariance_` as an\n estimator of its covariance matrix."
- },
- {
- "name": "error_norm",
- "decorators": [],
- "parameters": [
- {
- "name": "comp_cov",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The covariance to compare with."
- },
- {
- "name": "norm",
- "type": "Literal[\"frobenius\", \"spectral\"]",
- "hasDefault": true,
- "default": "\"frobenius\"",
- "limitation": null,
- "ignored": false,
- "docstring": "The type of norm used to compute the error. Available error types: - 'frobenius' (default): sqrt(tr(A^t.A)) - 'spectral': sqrt(max(eigenvalues(A^t.A)) where A is the error ``(comp_cov - self.covariance_)``."
- },
- {
- "name": "scaling",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If True (default), the squared error norm is divided by n_features. If False, the squared error norm is not rescaled."
- },
- {
- "name": "squared",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to compute the squared error norm or the error norm. If True (default), the squared error norm is returned. If False, the error norm is returned."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Computes the Mean Squared Error between two covariance estimators.\n(In the sense of the Frobenius norm).\n\nParameters\n----------\ncomp_cov : array-like of shape (n_features, n_features)\n The covariance to compare with.\n\nnorm : {\"frobenius\", \"spectral\"}, default=\"frobenius\"\n The type of norm used to compute the error. Available error types:\n - 'frobenius' (default): sqrt(tr(A^t.A))\n - 'spectral': sqrt(max(eigenvalues(A^t.A))\n where A is the error ``(comp_cov - self.covariance_)``.\n\nscaling : bool, default=True\n If True (default), the squared error norm is divided by n_features.\n If False, the squared error norm is not rescaled.\n\nsquared : bool, default=True\n Whether to compute the squared error norm or the error norm.\n If True (default), the squared error norm is returned.\n If False, the error norm is returned.\n\nReturns\n-------\nresult : float\n The Mean Squared Error (in the sense of the Frobenius norm) between\n `self` and `comp_cov` covariance estimators."
- },
- {
- "name": "mahalanobis",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The observations, the Mahalanobis distances of the which we compute. Observations are assumed to be drawn from the same distribution than the data used in fit."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Computes the squared Mahalanobis distances of given observations.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The observations, the Mahalanobis distances of the which we\n compute. Observations are assumed to be drawn from the same\n distribution than the data used in fit.\n\nReturns\n-------\ndist : ndarray of shape (n_samples,)\n Squared Mahalanobis distances of the observations."
- }
- ],
- "docstring": "Maximum likelihood covariance estimator\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specifies if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, data are not centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False (default), data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo-inverse matrix.\n (stored only if store_precision is True)\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import EmpiricalCovariance\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=500)\n>>> cov = EmpiricalCovariance().fit(X)\n>>> cov.covariance_\narray([[0.7569..., 0.2818...],\n [0.2818..., 0.3928...]])\n>>> cov.location_\narray([0.0622..., 0.0193...])"
- }
- ],
- "functions": [
- {
- "name": "log_likelihood",
- "decorators": [],
- "parameters": [
- {
- "name": "emp_cov",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum Likelihood Estimator of covariance."
- },
- {
- "name": "precision",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The precision matrix of the covariance model to be tested."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Computes the sample mean of the log_likelihood under a covariance model\n\ncomputes the empirical expected log-likelihood (accounting for the\nnormalization terms and scaling), allowing for universal comparison (beyond\nthis software package)\n\nParameters\n----------\nemp_cov : ndarray of shape (n_features, n_features)\n Maximum Likelihood Estimator of covariance.\n\nprecision : ndarray of shape (n_features, n_features)\n The precision matrix of the covariance model to be tested.\n\nReturns\n-------\nlog_likelihood_ : float\n Sample mean of the log-likelihood."
- },
- {
- "name": "empirical_covariance",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data from which to compute the covariance estimate"
- },
- {
- "name": "assume_centered",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, data will not be centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False, data will be centered before computation."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Computes the Maximum likelihood covariance estimator\n\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data will be centered before computation.\n\nReturns\n-------\ncovariance : ndarray of shape (n_features, n_features)\n Empirical covariance (Maximum Likelihood Estimator).\n\nExamples\n--------\n>>> from sklearn.covariance import empirical_covariance\n>>> X = [[1,1,1],[1,1,1],[1,1,1],\n... [0,0,0],[0,0,0],[0,0,0]]\n>>> empirical_covariance(X)\narray([[0.25, 0.25, 0.25],\n [0.25, 0.25, 0.25],\n [0.25, 0.25, 0.25]])"
- }
- ]
- },
- {
- "name": "sklearn.covariance._graph_lasso",
- "imports": [
- "from collections.abc import Sequence",
- "import warnings",
- "import operator",
- "import sys",
- "import time",
- "import numpy as np",
- "from scipy import linalg",
- "from joblib import Parallel",
- "from None import empirical_covariance",
- "from None import EmpiricalCovariance",
- "from None import log_likelihood",
- "from exceptions import ConvergenceWarning",
- "from utils.validation import check_random_state",
- "from utils.validation import _deprecate_positional_args",
- "from utils.fixes import delayed",
- "from linear_model import _cd_fast as cd_fast",
- "from linear_model import lars_path_gram",
- "from model_selection import check_cv",
- "from model_selection import cross_val_score",
- "from utils.deprecation import deprecated"
- ],
- "classes": [
- {
- "name": "GraphicalLasso",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "alpha",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "The regularization parameter: the higher alpha, the more regularization, the sparser the inverse covariance. Range is (0, inf]."
- },
- {
- "name": "mode",
- "type": "Literal['cd', 'lars']",
- "hasDefault": true,
- "default": "'cd'",
- "limitation": null,
- "ignored": false,
- "docstring": "The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where p > n. Elsewhere prefer cd which is more numerically stable."
- },
- {
- "name": "tol",
- "type": "float",
- "hasDefault": true,
- "default": "1e-4",
- "limitation": null,
- "ignored": false,
- "docstring": "The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. Range is (0, inf]."
- },
- {
- "name": "enet_tol",
- "type": "float",
- "hasDefault": true,
- "default": "1e-4",
- "limitation": null,
- "ignored": false,
- "docstring": "The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. Range is (0, inf]."
- },
- {
- "name": "max_iter",
- "type": "int",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "The maximum number of iterations."
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If verbose is True, the objective function and dual gap are plotted at each iteration."
- },
- {
- "name": "assume_centered",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, data are not centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False, data are centered before computation."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data from which to compute the covariance estimate"
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fits the GraphicalLasso model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object"
- }
- ],
- "docstring": "Sparse inverse covariance estimation with an l1-penalized estimator.\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n GraphLasso has been renamed to GraphicalLasso\n\nParameters\n----------\nalpha : float, default=0.01\n The regularization parameter: the higher alpha, the more\n regularization, the sparser the inverse covariance.\n Range is (0, inf].\n\nmode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where p > n. Elsewhere prefer cd\n which is more numerically stable.\n\ntol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n The maximum number of iterations.\n\nverbose : bool, default=False\n If verbose is True, the objective function and dual gap are\n plotted at each iteration.\n\nassume_centered : bool, default=False\n If True, data are not centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n\nn_iter_ : int\n Number of iterations run.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import GraphicalLasso\n>>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n... [0.0, 0.4, 0.0, 0.0],\n... [0.2, 0.0, 0.3, 0.1],\n... [0.0, 0.0, 0.1, 0.7]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n... cov=true_cov,\n... size=200)\n>>> cov = GraphicalLasso().fit(X)\n>>> np.around(cov.covariance_, decimals=3)\narray([[0.816, 0.049, 0.218, 0.019],\n [0.049, 0.364, 0.017, 0.034],\n [0.218, 0.017, 0.322, 0.093],\n [0.019, 0.034, 0.093, 0.69 ]])\n>>> np.around(cov.location_, decimals=3)\narray([0.073, 0.04 , 0.038, 0.143])\n\nSee Also\n--------\ngraphical_lasso, GraphicalLassoCV"
- },
- {
- "name": "GraphicalLassoCV",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "alphas",
- "type": "Union[ArrayLike, int]",
- "hasDefault": true,
- "default": "4",
- "limitation": null,
- "ignored": false,
- "docstring": "If an integer is given, it fixes the number of points on the grids of alpha to be used. If a list is given, it gives the grid to be used. See the notes in the class docstring for more details. Range is (0, inf] when floats given."
- },
- {
- "name": "n_refinements",
- "type": "int",
- "hasDefault": true,
- "default": "4",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of times the grid is refined. Not used if explicit values of alphas are passed. Range is [1, inf)."
- },
- {
- "name": "cv",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs :class:`KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.20 ``cv`` default value if None changed from 3-fold to 5-fold."
- },
- {
- "name": "tol",
- "type": "float",
- "hasDefault": true,
- "default": "1e-4",
- "limitation": null,
- "ignored": false,
- "docstring": "The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. Range is (0, inf]."
- },
- {
- "name": "enet_tol",
- "type": "float",
- "hasDefault": true,
- "default": "1e-4",
- "limitation": null,
- "ignored": false,
- "docstring": "The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. Range is (0, inf]."
- },
- {
- "name": "max_iter",
- "type": "int",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum number of iterations."
- },
- {
- "name": "mode",
- "type": "Literal['cd', 'lars']",
- "hasDefault": true,
- "default": "'cd'",
- "limitation": null,
- "ignored": false,
- "docstring": "The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where number of features is greater than number of samples. Elsewhere prefer cd which is more numerically stable."
- },
- {
- "name": "n_jobs",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None"
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If verbose is True, the objective function and duality gap are printed at each iteration."
- },
- {
- "name": "assume_centered",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, data are not centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False, data are centered before computation."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data from which to compute the covariance estimate"
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fits the GraphicalLasso covariance model to X.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object"
- },
- {
- "name": "grid_scores_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "cv_alphas_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n GraphLassoCV has been renamed to GraphicalLassoCV\n\nParameters\n----------\nalphas : int or array-like of shape (n_alphas,), dtype=float, default=4\n If an integer is given, it fixes the number of points on the\n grids of alpha to be used. If a list is given, it gives the\n grid to be used. See the notes in the class docstring for\n more details. Range is (0, inf] when floats given.\n\nn_refinements : int, default=4\n The number of times the grid is refined. Not used if explicit\n values of alphas are passed. Range is [1, inf).\n\ncv : int, cross-validation generator or iterable, default=None\n Determines the cross-validation splitting strategy.\n Possible inputs for cv are:\n\n - None, to use the default 5-fold cross-validation,\n - integer, to specify the number of folds.\n - :term:`CV splitter`,\n - An iterable yielding (train, test) splits as arrays of indices.\n\n For integer/None inputs :class:`KFold` is used.\n\n Refer :ref:`User Guide ` for the various\n cross-validation strategies that can be used here.\n\n .. versionchanged:: 0.20\n ``cv`` default value if None changed from 3-fold to 5-fold.\n\ntol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n Maximum number of iterations.\n\nmode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where number of features is greater\n than number of samples. Elsewhere prefer cd which is more numerically\n stable.\n\nn_jobs : int, default=None\n number of jobs to run in parallel.\n ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n ``-1`` means using all processors. See :term:`Glossary `\n for more details.\n\n .. versionchanged:: v0.20\n `n_jobs` default changed from 1 to None\n\nverbose : bool, default=False\n If verbose is True, the objective function and duality gap are\n printed at each iteration.\n\nassume_centered : bool, default=False\n If True, data are not centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated precision matrix (inverse covariance).\n\nalpha_ : float\n Penalization parameter selected.\n\ncv_alphas_ : list of shape (n_alphas,), dtype=float\n All penalization parameters explored.\n\n .. deprecated:: 0.24\n The `cv_alphas_` attribute is deprecated in version 0.24 in favor\n of `cv_results_['alphas']` and will be removed in version\n 1.1 (renaming of 0.26).\n\ngrid_scores_ : ndarray of shape (n_alphas, n_folds)\n Log-likelihood score on left-out data across folds.\n\n .. deprecated:: 0.24\n The `grid_scores_` attribute is deprecated in version 0.24 in favor\n of `cv_results_` and will be removed in version\n 1.1 (renaming of 0.26).\n\ncv_results_ : dict of ndarrays\n A dict with keys:\n\n alphas : ndarray of shape (n_alphas,)\n All penalization parameters explored.\n\n split(k)_score : ndarray of shape (n_alphas,)\n Log-likelihood score on left-out data across (k)th fold.\n\n mean_score : ndarray of shape (n_alphas,)\n Mean of scores over the folds.\n\n std_score : ndarray of shape (n_alphas,)\n Standard deviation of scores over the folds.\n\n .. versionadded:: 0.24\n\nn_iter_ : int\n Number of iterations run for the optimal alpha.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import GraphicalLassoCV\n>>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n... [0.0, 0.4, 0.0, 0.0],\n... [0.2, 0.0, 0.3, 0.1],\n... [0.0, 0.0, 0.1, 0.7]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n... cov=true_cov,\n... size=200)\n>>> cov = GraphicalLassoCV().fit(X)\n>>> np.around(cov.covariance_, decimals=3)\narray([[0.816, 0.051, 0.22 , 0.017],\n [0.051, 0.364, 0.018, 0.036],\n [0.22 , 0.018, 0.322, 0.094],\n [0.017, 0.036, 0.094, 0.69 ]])\n>>> np.around(cov.location_, decimals=3)\narray([0.073, 0.04 , 0.038, 0.143])\n\nSee Also\n--------\ngraphical_lasso, GraphicalLasso\n\nNotes\n-----\nThe search for the optimal penalization parameter (alpha) is done on an\niteratively refined grid: first the cross-validated scores on a grid are\ncomputed, then a new refined grid is centered around the maximum, and so\non.\n\nOne of the challenges which is faced here is that the solvers can\nfail to converge to a well-conditioned estimate. The corresponding\nvalues of alpha then come out as missing values, but the optimum may\nbe close to these missing values."
- }
- ],
- "functions": [
- {
- "name": "_objective",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Evaluation of the graphical-lasso objective function\n\nthe objective function is made of a shifted scaled version of the\nnormalized log-likelihood (i.e. its empirical mean over the samples) and a\npenalisation term to promote sparsity"
- },
- {
- "name": "_dual_gap",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Expression of the dual gap convergence criterion\n\nThe specific definition is given in Duchi \"Projected Subgradient Methods\nfor Learning Sparse Gaussians\"."
- },
- {
- "name": "alpha_max",
- "decorators": [],
- "parameters": [
- {
- "name": "emp_cov",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The sample covariance matrix."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Find the maximum alpha for which there are some non-zeros off-diagonal.\n\nParameters\n----------\nemp_cov : ndarray of shape (n_features, n_features)\n The sample covariance matrix.\n\nNotes\n-----\nThis results from the bound for the all the Lasso that are solved\nin GraphicalLasso: each time, the row of cov corresponds to Xy. As the\nbound for alpha is given by `max(abs(Xy))`, the result follows."
- },
- {
- "name": "graphical_lasso",
- "decorators": [],
- "parameters": [
- {
- "name": "emp_cov",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Empirical covariance from which to compute the covariance estimate."
- },
- {
- "name": "alpha",
- "type": "float",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The regularization parameter: the higher alpha, the more regularization, the sparser the inverse covariance. Range is (0, inf]."
- },
- {
- "name": "cov_init",
- "type": "Array",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The initial guess for the covariance. If None, then the empirical covariance is used."
- },
- {
- "name": "mode",
- "type": "Literal['cd', 'lars']",
- "hasDefault": true,
- "default": "'cd'",
- "limitation": null,
- "ignored": false,
- "docstring": "The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where p > n. Elsewhere prefer cd which is more numerically stable."
- },
- {
- "name": "tol",
- "type": "float",
- "hasDefault": true,
- "default": "1e-4",
- "limitation": null,
- "ignored": false,
- "docstring": "The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. Range is (0, inf]."
- },
- {
- "name": "enet_tol",
- "type": "float",
- "hasDefault": true,
- "default": "1e-4",
- "limitation": null,
- "ignored": false,
- "docstring": "The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. Range is (0, inf]."
- },
- {
- "name": "max_iter",
- "type": "int",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "The maximum number of iterations."
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If verbose is True, the objective function and dual gap are printed at each iteration."
- },
- {
- "name": "return_costs",
- "type": "bool",
- "hasDefault": true,
- "default": "Flase",
- "limitation": null,
- "ignored": false,
- "docstring": "If return_costs is True, the objective function and dual gap at each iteration are returned."
- },
- {
- "name": "eps",
- "type": "float",
- "hasDefault": true,
- "default": "eps",
- "limitation": null,
- "ignored": false,
- "docstring": "The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Default is `np.finfo(np.float64).eps`."
- },
- {
- "name": "return_n_iter",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether or not to return the number of iterations."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "l1-penalized covariance estimator\n\nRead more in the :ref:`User Guide `.\n\n.. versionchanged:: v0.20\n graph_lasso has been renamed to graphical_lasso\n\nParameters\n----------\nemp_cov : ndarray of shape (n_features, n_features)\n Empirical covariance from which to compute the covariance estimate.\n\nalpha : float\n The regularization parameter: the higher alpha, the more\n regularization, the sparser the inverse covariance.\n Range is (0, inf].\n\ncov_init : array of shape (n_features, n_features), default=None\n The initial guess for the covariance. If None, then the empirical\n covariance is used.\n\nmode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where p > n. Elsewhere prefer cd\n which is more numerically stable.\n\ntol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n The maximum number of iterations.\n\nverbose : bool, default=False\n If verbose is True, the objective function and dual gap are\n printed at each iteration.\n\nreturn_costs : bool, default=Flase\n If return_costs is True, the objective function and dual gap\n at each iteration are returned.\n\neps : float, default=eps\n The machine-precision regularization in the computation of the\n Cholesky diagonal factors. Increase this for very ill-conditioned\n systems. Default is `np.finfo(np.float64).eps`.\n\nreturn_n_iter : bool, default=False\n Whether or not to return the number of iterations.\n\nReturns\n-------\ncovariance : ndarray of shape (n_features, n_features)\n The estimated covariance matrix.\n\nprecision : ndarray of shape (n_features, n_features)\n The estimated (sparse) precision matrix.\n\ncosts : list of (objective, dual_gap) pairs\n The list of values of the objective function and the dual gap at\n each iteration. Returned only if return_costs is True.\n\nn_iter : int\n Number of iterations. Returned only if `return_n_iter` is set to True.\n\nSee Also\n--------\nGraphicalLasso, GraphicalLassoCV\n\nNotes\n-----\nThe algorithm employed to solve this problem is the GLasso algorithm,\nfrom the Friedman 2008 Biostatistics paper. It is the same algorithm\nas in the R `glasso` package.\n\nOne possible difference with the `glasso` R package is that the\ndiagonal coefficients are not penalized."
- },
- {
- "name": "graphical_lasso_path",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "NDArray",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data from which to compute the covariance estimate."
- },
- {
- "name": "alphas",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The list of regularization parameters, decreasing order."
- },
- {
- "name": "cov_init",
- "type": "Array",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The initial guess for the covariance."
- },
- {
- "name": "X_test",
- "type": "Array",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Optional test matrix to measure generalisation error."
- },
- {
- "name": "mode",
- "type": "Literal['cd', 'lars']",
- "hasDefault": true,
- "default": "'cd'",
- "limitation": null,
- "ignored": false,
- "docstring": "The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where p > n. Elsewhere prefer cd which is more numerically stable."
- },
- {
- "name": "tol",
- "type": "float",
- "hasDefault": true,
- "default": "1e-4",
- "limitation": null,
- "ignored": false,
- "docstring": "The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. The tolerance must be a positive number."
- },
- {
- "name": "enet_tol",
- "type": "float",
- "hasDefault": true,
- "default": "1e-4",
- "limitation": null,
- "ignored": false,
- "docstring": "The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. The tolerance must be a positive number."
- },
- {
- "name": "max_iter",
- "type": "int",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "The maximum number of iterations. This parameter should be a strictly positive integer."
- },
- {
- "name": "verbose",
- "type": "Union[bool, int]",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "The higher the verbosity flag, the more information is printed during the fitting."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "l1-penalized covariance estimator along a path of decreasing alphas\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n Data from which to compute the covariance estimate.\n\nalphas : array-like of shape (n_alphas,)\n The list of regularization parameters, decreasing order.\n\ncov_init : array of shape (n_features, n_features), default=None\n The initial guess for the covariance.\n\nX_test : array of shape (n_test_samples, n_features), default=None\n Optional test matrix to measure generalisation error.\n\nmode : {'cd', 'lars'}, default='cd'\n The Lasso solver to use: coordinate descent or LARS. Use LARS for\n very sparse underlying graphs, where p > n. Elsewhere prefer cd\n which is more numerically stable.\n\ntol : float, default=1e-4\n The tolerance to declare convergence: if the dual gap goes below\n this value, iterations are stopped. The tolerance must be a positive\n number.\n\nenet_tol : float, default=1e-4\n The tolerance for the elastic net solver used to calculate the descent\n direction. This parameter controls the accuracy of the search direction\n for a given column update, not of the overall parameter estimate. Only\n used for mode='cd'. The tolerance must be a positive number.\n\nmax_iter : int, default=100\n The maximum number of iterations. This parameter should be a strictly\n positive integer.\n\nverbose : int or bool, default=False\n The higher the verbosity flag, the more information is printed\n during the fitting.\n\nReturns\n-------\ncovariances_ : list of shape (n_alphas,) of ndarray of shape (n_features, n_features)\n The estimated covariance matrices.\n\nprecisions_ : list of shape (n_alphas,) of ndarray of shape (n_features, n_features)\n The estimated (sparse) precision matrices.\n\nscores_ : list of shape (n_alphas,), dtype=float\n The generalisation error (log-likelihood) on the test data.\n Returned only if test data is passed."
- }
- ]
- },
- {
- "name": "sklearn.covariance._robust_covariance",
- "imports": [
- "import warnings",
- "import numbers",
- "import numpy as np",
- "from scipy import linalg",
- "from scipy.stats import chi2",
- "from None import empirical_covariance",
- "from None import EmpiricalCovariance",
- "from utils.extmath import fast_logdet",
- "from utils import check_random_state",
- "from utils import check_array",
- "from utils.validation import _deprecate_positional_args"
- ],
- "classes": [
- {
- "name": "MinCovDet",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "store_precision",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Specify if the estimated precision is stored."
- },
- {
- "name": "assume_centered",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the support of the robust location and the covariance estimates is computed, and a covariance estimate is recomputed from it, without centering the data. Useful to work with data whose mean is significantly equal to zero but is not exactly zero. If False, the robust location and covariance are directly computed with the FastMCD algorithm without additional treatment."
- },
- {
- "name": "support_fraction",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The proportion of points to be included in the support of the raw MCD estimate. Default is None, which implies that the minimum value of support_fraction will be used within the algorithm: `(n_sample + n_features + 1) / 2`. The parameter must be in the range (0, 1)."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines the pseudo random number generator for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data, where `n_samples` is the number of samples and `n_features` is the number of features."
- },
- {
- "name": "y: Ignored",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Not used, present for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fits a Minimum Covariance Determinant with the FastMCD algorithm.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\n\ny: Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object"
- },
- {
- "name": "correct_covariance",
- "decorators": [],
- "parameters": [
- {
- "name": "data",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The data matrix, with p features and n samples. The data set must be the one which was used to compute the raw estimates."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply a correction to raw Minimum Covariance Determinant estimates.\n\nCorrection using the empirical correction factor suggested\nby Rousseeuw and Van Driessen in [RVD]_.\n\nParameters\n----------\ndata : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n The data set must be the one which was used to compute\n the raw estimates.\n\nReturns\n-------\ncovariance_corrected : ndarray of shape (n_features, n_features)\n Corrected robust covariance estimate.\n\nReferences\n----------\n\n.. [RVD] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS"
- },
- {
- "name": "reweight_covariance",
- "decorators": [],
- "parameters": [
- {
- "name": "data",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The data matrix, with p features and n samples. The data set must be the one which was used to compute the raw estimates."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Re-weight raw Minimum Covariance Determinant estimates.\n\nRe-weight observations using Rousseeuw's method (equivalent to\ndeleting outlying observations from the data set before\ncomputing location and covariance estimates) described\nin [RVDriessen]_.\n\nParameters\n----------\ndata : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n The data set must be the one which was used to compute\n the raw estimates.\n\nReturns\n-------\nlocation_reweighted : ndarray of shape (n_features,)\n Re-weighted robust location estimate.\n\ncovariance_reweighted : ndarray of shape (n_features, n_features)\n Re-weighted robust covariance estimate.\n\nsupport_reweighted : ndarray of shape (n_samples,), dtype=bool\n A mask of the observations that have been used to compute\n the re-weighted robust location and covariance estimates.\n\nReferences\n----------\n\n.. [RVDriessen] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS"
- }
- ],
- "docstring": "Minimum Covariance Determinant (MCD): robust estimator of covariance.\n\nThe Minimum Covariance Determinant covariance estimator is to be applied\non Gaussian-distributed data, but could still be relevant on data\ndrawn from a unimodal, symmetric distribution. It is not meant to be used\nwith multi-modal data (the algorithm used to fit a MinCovDet object is\nlikely to fail in such a case).\nOne should consider projection pursuit methods to deal with multi-modal\ndatasets.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, the support of the robust location and the covariance\n estimates is computed, and a covariance estimate is recomputed from\n it, without centering the data.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, the robust location and covariance are directly computed\n with the FastMCD algorithm without additional treatment.\n\nsupport_fraction : float, default=None\n The proportion of points to be included in the support of the raw\n MCD estimate. Default is None, which implies that the minimum\n value of support_fraction will be used within the algorithm:\n `(n_sample + n_features + 1) / 2`. The parameter must be in the range\n (0, 1).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nAttributes\n----------\nraw_location_ : ndarray of shape (n_features,)\n The raw robust estimated location before correction and re-weighting.\n\nraw_covariance_ : ndarray of shape (n_features, n_features)\n The raw robust estimated covariance before correction and re-weighting.\n\nraw_support_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute\n the raw robust estimates of location and shape, before correction\n and re-weighting.\n\nlocation_ : ndarray of shape (n_features,)\n Estimated robust location.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated robust covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nsupport_ : ndarray of shape (n_samples,)\n A mask of the observations that have been used to compute\n the robust estimates of location and shape.\n\ndist_ : ndarray of shape (n_samples,)\n Mahalanobis distances of the training set (on which :meth:`fit` is\n called) observations.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import MinCovDet\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=500)\n>>> cov = MinCovDet(random_state=0).fit(X)\n>>> cov.covariance_\narray([[0.7411..., 0.2535...],\n [0.2535..., 0.3053...]])\n>>> cov.location_\narray([0.0813... , 0.0427...])\n\nReferences\n----------\n\n.. [Rouseeuw1984] P. J. Rousseeuw. Least median of squares regression.\n J. Am Stat Ass, 79:871, 1984.\n.. [Rousseeuw] A Fast Algorithm for the Minimum Covariance Determinant\n Estimator, 1999, American Statistical Association and the American\n Society for Quality, TECHNOMETRICS\n.. [ButlerDavies] R. W. Butler, P. L. Davies and M. Jhun,\n Asymptotics For The Minimum Covariance Determinant Estimator,\n The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400"
- }
- ],
- "functions": [
- {
- "name": "c_step",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data set in which we look for the n_support observations whose scatter matrix has minimum determinant."
- },
- {
- "name": "n_support",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of observations to compute the robust estimates of location and covariance from. This parameter must be greater than `n_samples / 2`."
- },
- {
- "name": "remaining_iterations",
- "type": "int",
- "hasDefault": true,
- "default": "30",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of iterations to perform. According to [Rouseeuw1999]_, two iterations are sufficient to get close to the minimum, and we never need more than 30 to reach convergence."
- },
- {
- "name": "initial_estimates",
- "type": "Tuple[]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Initial estimates of location and shape from which to run the c_step procedure: - initial_estimates[0]: an initial location estimate - initial_estimates[1]: an initial covariance estimate"
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Verbose mode."
- },
- {
- "name": "cov_computation_method",
- "type": "Callable",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The function which will be used to compute the covariance. Must return array of shape (n_features, n_features)."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines the pseudo random number generator for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "C_step procedure described in [Rouseeuw1984]_ aiming at computing MCD.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data set in which we look for the n_support observations whose\n scatter matrix has minimum determinant.\n\nn_support : int\n Number of observations to compute the robust estimates of location\n and covariance from. This parameter must be greater than\n `n_samples / 2`.\n\nremaining_iterations : int, default=30\n Number of iterations to perform.\n According to [Rouseeuw1999]_, two iterations are sufficient to get\n close to the minimum, and we never need more than 30 to reach\n convergence.\n\ninitial_estimates : tuple of shape (2,), default=None\n Initial estimates of location and shape from which to run the c_step\n procedure:\n - initial_estimates[0]: an initial location estimate\n - initial_estimates[1]: an initial covariance estimate\n\nverbose : bool, default=False\n Verbose mode.\n\ncov_computation_method : callable, default=:func:`sklearn.covariance.empirical_covariance`\n The function which will be used to compute the covariance.\n Must return array of shape (n_features, n_features).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nReturns\n-------\nlocation : ndarray of shape (n_features,)\n Robust location estimates.\n\ncovariance : ndarray of shape (n_features, n_features)\n Robust covariance estimates.\n\nsupport : ndarray of shape (n_samples,)\n A mask for the `n_support` observations whose scatter matrix has\n minimum determinant.\n\nReferences\n----------\n.. [Rouseeuw1999] A Fast Algorithm for the Minimum Covariance Determinant\n Estimator, 1999, American Statistical Association and the American\n Society for Quality, TECHNOMETRICS"
- },
- {
- "name": "_c_step",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "select_candidates",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data (sub)set in which we look for the n_support purest observations."
- },
- {
- "name": "n_support",
- "type": "int",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of samples the pure data set must contain. This parameter must be in the range `[(n + p + 1)/2] < n_support < n`."
- },
- {
- "name": "n_trials",
- "type": "Union[Tuple[], int]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of different initial sets of observations from which to run the algorithm. This parameter should be a strictly positive integer. Instead of giving a number of trials to perform, one can provide a list of initial estimates that will be used to iteratively run c_step procedures. In this case: - n_trials[0]: array-like, shape (n_trials, n_features) is the list of `n_trials` initial location estimates - n_trials[1]: array-like, shape (n_trials, n_features, n_features) is the list of `n_trials` initial covariances estimates"
- },
- {
- "name": "select",
- "type": "int",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of best candidates results to return. This parameter must be a strictly positive integer."
- },
- {
- "name": "n_iter",
- "type": "int",
- "hasDefault": true,
- "default": "30",
- "limitation": null,
- "ignored": false,
- "docstring": "Maximum number of iterations for the c_step procedure. (2 is enough to be close to the final solution. \"Never\" exceeds 20). This parameter must be a strictly positive integer."
- },
- {
- "name": "verbose",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Control the output verbosity."
- },
- {
- "name": "cov_computation_method",
- "type": "Callable",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The function which will be used to compute the covariance. Must return an array of shape (n_features, n_features)."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines the pseudo random number generator for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Finds the best pure subset of observations to compute MCD from it.\n\nThe purpose of this function is to find the best sets of n_support\nobservations with respect to a minimization of their covariance\nmatrix determinant. Equivalently, it removes n_samples-n_support\nobservations to construct what we call a pure data set (i.e. not\ncontaining outliers). The list of the observations of the pure\ndata set is referred to as the `support`.\n\nStarting from a random support, the pure data set is found by the\nc_step procedure introduced by Rousseeuw and Van Driessen in\n[RV]_.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data (sub)set in which we look for the n_support purest observations.\n\nn_support : int\n The number of samples the pure data set must contain.\n This parameter must be in the range `[(n + p + 1)/2] < n_support < n`.\n\nn_trials : int or tuple of shape (2,)\n Number of different initial sets of observations from which to\n run the algorithm. This parameter should be a strictly positive\n integer.\n Instead of giving a number of trials to perform, one can provide a\n list of initial estimates that will be used to iteratively run\n c_step procedures. In this case:\n - n_trials[0]: array-like, shape (n_trials, n_features)\n is the list of `n_trials` initial location estimates\n - n_trials[1]: array-like, shape (n_trials, n_features, n_features)\n is the list of `n_trials` initial covariances estimates\n\nselect : int, default=1\n Number of best candidates results to return. This parameter must be\n a strictly positive integer.\n\nn_iter : int, default=30\n Maximum number of iterations for the c_step procedure.\n (2 is enough to be close to the final solution. \"Never\" exceeds 20).\n This parameter must be a strictly positive integer.\n\nverbose : bool, default=False\n Control the output verbosity.\n\ncov_computation_method : callable, default=:func:`sklearn.covariance.empirical_covariance`\n The function which will be used to compute the covariance.\n Must return an array of shape (n_features, n_features).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nSee Also\n---------\nc_step\n\nReturns\n-------\nbest_locations : ndarray of shape (select, n_features)\n The `select` location estimates computed from the `select` best\n supports found in the data set (`X`).\n\nbest_covariances : ndarray of shape (select, n_features, n_features)\n The `select` covariance estimates computed from the `select`\n best supports found in the data set (`X`).\n\nbest_supports : ndarray of shape (select, n_samples)\n The `select` best supports found in the data set (`X`).\n\nReferences\n----------\n.. [RV] A Fast Algorithm for the Minimum Covariance Determinant\n Estimator, 1999, American Statistical Association and the American\n Society for Quality, TECHNOMETRICS"
- },
- {
- "name": "fast_mcd",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The data matrix, with p features and n samples."
- },
- {
- "name": "support_fraction",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The proportion of points to be included in the support of the raw MCD estimate. Default is `None`, which implies that the minimum value of `support_fraction` will be used within the algorithm: `(n_sample + n_features + 1) / 2`. This parameter must be in the range (0, 1)."
- },
- {
- "name": "cov_computation_method",
- "type": "Callable",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The function which will be used to compute the covariance. Must return an array of shape (n_features, n_features)."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines the pseudo random number generator for shuffling the data. Pass an int for reproducible results across multiple function calls. See :term: `Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Estimates the Minimum Covariance Determinant matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n The data matrix, with p features and n samples.\n\nsupport_fraction : float, default=None\n The proportion of points to be included in the support of the raw\n MCD estimate. Default is `None`, which implies that the minimum\n value of `support_fraction` will be used within the algorithm:\n `(n_sample + n_features + 1) / 2`. This parameter must be in the\n range (0, 1).\n\ncov_computation_method : callable, default=:func:`sklearn.covariance.empirical_covariance`\n The function which will be used to compute the covariance.\n Must return an array of shape (n_features, n_features).\n\nrandom_state : int, RandomState instance or None, default=None\n Determines the pseudo random number generator for shuffling the data.\n Pass an int for reproducible results across multiple function calls.\n See :term: `Glossary `.\n\nReturns\n-------\nlocation : ndarray of shape (n_features,)\n Robust location of the data.\n\ncovariance : ndarray of shape (n_features, n_features)\n Robust covariance of the features.\n\nsupport : ndarray of shape (n_samples,), dtype=bool\n A mask of the observations that have been used to compute\n the robust location and covariance estimates of the data set.\n\nNotes\n-----\nThe FastMCD algorithm has been introduced by Rousseuw and Van Driessen\nin \"A Fast Algorithm for the Minimum Covariance Determinant Estimator,\n1999, American Statistical Association and the American Society\nfor Quality, TECHNOMETRICS\".\nThe principle is to compute robust estimates and random subsets before\npooling them into a larger subsets, and finally into the full data set.\nDepending on the size of the initial sample, we have one, two or three\nsuch computation levels.\n\nNote that only raw estimates are returned. If one is interested in\nthe correction and reweighting steps described in [RouseeuwVan]_,\nsee the MinCovDet object.\n\nReferences\n----------\n\n.. [RouseeuwVan] A Fast Algorithm for the Minimum Covariance\n Determinant Estimator, 1999, American Statistical Association\n and the American Society for Quality, TECHNOMETRICS\n\n.. [Butler1993] R. W. Butler, P. L. Davies and M. Jhun,\n Asymptotics For The Minimum Covariance Determinant Estimator,\n The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400"
- }
- ]
- },
- {
- "name": "sklearn.covariance._shrunk_covariance",
- "imports": [
- "import warnings",
- "import numpy as np",
- "from None import empirical_covariance",
- "from None import EmpiricalCovariance",
- "from utils import check_array",
- "from utils.validation import _deprecate_positional_args"
- ],
- "classes": [
- {
- "name": "ShrunkCovariance",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "store_precision",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Specify if the estimated precision is stored"
- },
- {
- "name": "assume_centered",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, data will not be centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False, data will be centered before computation."
- },
- {
- "name": "shrinkage",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Coefficient in the convex combination used for the computation of the shrunk estimate. Range is [0, 1]."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data, where n_samples is the number of samples and n_features is the number of features."
- },
- {
- "name": "y: Ignored",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Not used, present for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the shrunk covariance model according to the given training data\nand parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where n_samples is the number of samples\n and n_features is the number of features.\n\ny: Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object"
- }
- ],
- "docstring": "Covariance estimator with shrinkage\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False, data will be centered before computation.\n\nshrinkage : float, default=0.1\n Coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\nAttributes\n----------\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix\n\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import ShrunkCovariance\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=500)\n>>> cov = ShrunkCovariance().fit(X)\n>>> cov.covariance_\narray([[0.7387..., 0.2536...],\n [0.2536..., 0.4110...]])\n>>> cov.location_\narray([0.0622..., 0.0193...])\n\nNotes\n-----\nThe regularized covariance is given by:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features"
- },
- {
- "name": "LedoitWolf",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "store_precision",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Specify if the estimated precision is stored."
- },
- {
- "name": "assume_centered",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, data will not be centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False (default), data will be centered before computation."
- },
- {
- "name": "block_size",
- "type": "int",
- "hasDefault": true,
- "default": "1000",
- "limitation": null,
- "ignored": false,
- "docstring": "Size of blocks into which the covariance matrix will be split during its Ledoit-Wolf estimation. This is purely a memory optimization and does not affect results."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data, where `n_samples` is the number of samples and `n_features` is the number of features."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the Ledoit-Wolf shrunk covariance model according to the given\ntraining data and parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object"
- }
- ],
- "docstring": "LedoitWolf Estimator\n\nLedoit-Wolf is a particular form of shrinkage, where the shrinkage\ncoefficient is computed using O. Ledoit and M. Wolf's formula as\ndescribed in \"A Well-Conditioned Estimator for Large-Dimensional\nCovariance Matrices\", Ledoit and Wolf, Journal of Multivariate\nAnalysis, Volume 88, Issue 2, February 2004, pages 365-411.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False (default), data will be centered before computation.\n\nblock_size : int, default=1000\n Size of blocks into which the covariance matrix will be split\n during its Ledoit-Wolf estimation. This is purely a memory\n optimization and does not affect results.\n\nAttributes\n----------\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.\n\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nshrinkage_ : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import LedoitWolf\n>>> real_cov = np.array([[.4, .2],\n... [.2, .8]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=50)\n>>> cov = LedoitWolf().fit(X)\n>>> cov.covariance_\narray([[0.4406..., 0.1616...],\n [0.1616..., 0.8022...]])\n>>> cov.location_\narray([ 0.0595... , -0.0075...])\n\nNotes\n-----\nThe regularised covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features\nand shrinkage is given by the Ledoit and Wolf formula (see References)\n\nReferences\n----------\n\"A Well-Conditioned Estimator for Large-Dimensional Covariance Matrices\",\nLedoit and Wolf, Journal of Multivariate Analysis, Volume 88, Issue 2,\nFebruary 2004, pages 365-411."
- },
- {
- "name": "OAS",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "store_precision",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Specify if the estimated precision is stored."
- },
- {
- "name": "assume_centered",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, data will not be centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False (default), data will be centered before computation."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training data, where `n_samples` is the number of samples and `n_features` is the number of features."
- },
- {
- "name": "y",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": true,
- "docstring": "Not used, present for API consistency by convention."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Fit the Oracle Approximating Shrinkage covariance model\naccording to the given training data and parameters.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training data, where `n_samples` is the number of samples\n and `n_features` is the number of features.\ny : Ignored\n Not used, present for API consistency by convention.\n\nReturns\n-------\nself : object"
- }
- ],
- "docstring": "Oracle Approximating Shrinkage Estimator\n\nRead more in the :ref:`User Guide `.\n\nOAS is a particular form of shrinkage described in\n\"Shrinkage Algorithms for MMSE Covariance Estimation\"\nChen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010.\n\nThe formula used here does not correspond to the one given in the\narticle. In the original article, formula (23) states that 2/p is\nmultiplied by Trace(cov*cov) in both the numerator and denominator, but\nthis operation is omitted because for a large p, the value of 2/p is\nso small that it doesn't affect the value of the estimator.\n\nParameters\n----------\nstore_precision : bool, default=True\n Specify if the estimated precision is stored.\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful when working with data whose mean is almost, but not exactly\n zero.\n If False (default), data will be centered before computation.\n\nAttributes\n----------\ncovariance_ : ndarray of shape (n_features, n_features)\n Estimated covariance matrix.\n\nlocation_ : ndarray of shape (n_features,)\n Estimated location, i.e. the estimated mean.\n\nprecision_ : ndarray of shape (n_features, n_features)\n Estimated pseudo inverse matrix.\n (stored only if store_precision is True)\n\nshrinkage_ : float\n coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import OAS\n>>> from sklearn.datasets import make_gaussian_quantiles\n>>> real_cov = np.array([[.8, .3],\n... [.3, .4]])\n>>> rng = np.random.RandomState(0)\n>>> X = rng.multivariate_normal(mean=[0, 0],\n... cov=real_cov,\n... size=500)\n>>> oas = OAS().fit(X)\n>>> oas.covariance_\narray([[0.7533..., 0.2763...],\n [0.2763..., 0.3964...]])\n>>> oas.precision_\narray([[ 1.7833..., -1.2431... ],\n [-1.2431..., 3.3889...]])\n>>> oas.shrinkage_\n0.0195...\n\nNotes\n-----\nThe regularised covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features\nand shrinkage is given by the OAS formula (see References)\n\nReferences\n----------\n\"Shrinkage Algorithms for MMSE Covariance Estimation\"\nChen et al., IEEE Trans. on Sign. Proc., Volume 58, Issue 10, October 2010."
- }
- ],
- "functions": [
- {
- "name": "shrunk_covariance",
- "decorators": [],
- "parameters": [
- {
- "name": "emp_cov",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Covariance matrix to be shrunk"
- },
- {
- "name": "shrinkage",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Coefficient in the convex combination used for the computation of the shrunk estimate. Range is [0, 1]."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Calculates a covariance matrix shrunk on the diagonal\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nemp_cov : array-like of shape (n_features, n_features)\n Covariance matrix to be shrunk\n\nshrinkage : float, default=0.1\n Coefficient in the convex combination used for the computation\n of the shrunk estimate. Range is [0, 1].\n\nReturns\n-------\nshrunk_cov : ndarray of shape (n_features, n_features)\n Shrunk covariance.\n\nNotes\n-----\nThe regularized (shrunk) covariance is given by:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features"
- },
- {
- "name": "ledoit_wolf_shrinkage",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data from which to compute the Ledoit-Wolf shrunk covariance shrinkage."
- },
- {
- "name": "assume_centered",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, data will not be centered before computation. Useful to work with data whose mean is significantly equal to zero but is not exactly zero. If False, data will be centered before computation."
- },
- {
- "name": "block_size",
- "type": "int",
- "hasDefault": true,
- "default": "1000",
- "limitation": null,
- "ignored": false,
- "docstring": "Size of blocks into which the covariance matrix will be split."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Estimates the shrunk Ledoit-Wolf covariance matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the Ledoit-Wolf shrunk covariance shrinkage.\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, data will be centered before computation.\n\nblock_size : int, default=1000\n Size of blocks into which the covariance matrix will be split.\n\nReturns\n-------\nshrinkage : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate.\n\nNotes\n-----\nThe regularized (shrunk) covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features"
- },
- {
- "name": "ledoit_wolf",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data from which to compute the covariance estimate"
- },
- {
- "name": "assume_centered",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, data will not be centered before computation. Useful to work with data whose mean is significantly equal to zero but is not exactly zero. If False, data will be centered before computation."
- },
- {
- "name": "block_size",
- "type": "int",
- "hasDefault": true,
- "default": "1000",
- "limitation": null,
- "ignored": false,
- "docstring": "Size of blocks into which the covariance matrix will be split. This is purely a memory optimization and does not affect results."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Estimates the shrunk Ledoit-Wolf covariance matrix.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, data will be centered before computation.\n\nblock_size : int, default=1000\n Size of blocks into which the covariance matrix will be split.\n This is purely a memory optimization and does not affect results.\n\nReturns\n-------\nshrunk_cov : ndarray of shape (n_features, n_features)\n Shrunk covariance.\n\nshrinkage : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate.\n\nNotes\n-----\nThe regularized (shrunk) covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features"
- },
- {
- "name": "oas",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Data from which to compute the covariance estimate."
- },
- {
- "name": "assume_centered",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, data will not be centered before computation. Useful to work with data whose mean is significantly equal to zero but is not exactly zero. If False, data will be centered before computation."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Estimate covariance with the Oracle Approximating Shrinkage algorithm.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Data from which to compute the covariance estimate.\n\nassume_centered : bool, default=False\n If True, data will not be centered before computation.\n Useful to work with data whose mean is significantly equal to\n zero but is not exactly zero.\n If False, data will be centered before computation.\n\nReturns\n-------\nshrunk_cov : array-like of shape (n_features, n_features)\n Shrunk covariance.\n\nshrinkage : float\n Coefficient in the convex combination used for the computation\n of the shrunk estimate.\n\nNotes\n-----\nThe regularised (shrunk) covariance is:\n\n(1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features)\n\nwhere mu = trace(cov) / n_features\n\nThe formula we used to implement the OAS is slightly modified compared\nto the one given in the article. See :class:`OAS` for more details."
- }
- ]
- },
- {
- "name": "sklearn.covariance",
- "imports": [
- "from _empirical_covariance import empirical_covariance",
- "from _empirical_covariance import EmpiricalCovariance",
- "from _empirical_covariance import log_likelihood",
- "from _shrunk_covariance import shrunk_covariance",
- "from _shrunk_covariance import ShrunkCovariance",
- "from _shrunk_covariance import ledoit_wolf",
- "from _shrunk_covariance import ledoit_wolf_shrinkage",
- "from _shrunk_covariance import LedoitWolf",
- "from _shrunk_covariance import oas",
- "from _shrunk_covariance import OAS",
- "from _robust_covariance import fast_mcd",
- "from _robust_covariance import MinCovDet",
- "from _graph_lasso import graphical_lasso",
- "from _graph_lasso import GraphicalLasso",
- "from _graph_lasso import GraphicalLassoCV",
- "from _elliptic_envelope import EllipticEnvelope"
- ],
- "classes": [],
- "functions": []
- },
- {
- "name": "sklearn.covariance.tests.test_covariance",
- "imports": [
- "import numpy as np",
- "import pytest",
- "from sklearn.utils._testing import assert_almost_equal",
- "from sklearn.utils._testing import assert_array_almost_equal",
- "from sklearn.utils._testing import assert_array_equal",
- "from sklearn.utils._testing import assert_warns",
- "from sklearn import datasets",
- "from sklearn.covariance import empirical_covariance",
- "from sklearn.covariance import EmpiricalCovariance",
- "from sklearn.covariance import ShrunkCovariance",
- "from sklearn.covariance import shrunk_covariance",
- "from sklearn.covariance import LedoitWolf",
- "from sklearn.covariance import ledoit_wolf",
- "from sklearn.covariance import ledoit_wolf_shrinkage",
- "from sklearn.covariance import OAS",
- "from sklearn.covariance import oas"
- ],
- "classes": [],
- "functions": [
- {
- "name": "test_covariance",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_shrunk_covariance",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_ledoit_wolf",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_naive_ledoit_wolf_shrinkage",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_ledoit_wolf_small",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_ledoit_wolf_large",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_oas",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.covariance.tests.test_elliptic_envelope",
- "imports": [
- "import numpy as np",
- "import pytest",
- "from sklearn.covariance import EllipticEnvelope",
- "from sklearn.utils._testing import assert_almost_equal",
- "from sklearn.utils._testing import assert_array_almost_equal",
- "from sklearn.utils._testing import assert_array_equal",
- "from sklearn.exceptions import NotFittedError"
- ],
- "classes": [],
- "functions": [
- {
- "name": "test_elliptic_envelope",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_score_samples",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.covariance.tests.test_graphical_lasso",
- "imports": [
- "import sys",
- "import pytest",
- "import numpy as np",
- "from scipy import linalg",
- "from numpy.testing import assert_allclose",
- "from sklearn.utils._testing import assert_array_almost_equal",
- "from sklearn.utils._testing import assert_array_less",
- "from sklearn.covariance import graphical_lasso",
- "from sklearn.covariance import GraphicalLasso",
- "from sklearn.covariance import GraphicalLassoCV",
- "from sklearn.covariance import empirical_covariance",
- "from sklearn.datasets import make_sparse_spd_matrix",
- "from io import StringIO",
- "from sklearn.utils import check_random_state",
- "from sklearn import datasets"
- ],
- "classes": [],
- "functions": [
- {
- "name": "test_graphical_lasso",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_graphical_lasso_iris",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_graph_lasso_2D",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_graphical_lasso_iris_singular",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_graphical_lasso_cv",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_graphical_lasso_cv_grid_scores_and_cv_alphas_deprecated",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_graphical_lasso_cv_scores",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.covariance.tests.test_robust_covariance",
- "imports": [
- "import itertools",
- "import numpy as np",
- "from sklearn.utils._testing import assert_array_almost_equal",
- "from sklearn.utils._testing import assert_raise_message",
- "from sklearn.utils._testing import assert_warns_message",
- "from sklearn import datasets",
- "from sklearn.covariance import empirical_covariance",
- "from sklearn.covariance import MinCovDet",
- "from sklearn.covariance import fast_mcd"
- ],
- "classes": [],
- "functions": [
- {
- "name": "test_mcd",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_fast_mcd_on_invalid_input",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_mcd_class_on_invalid_input",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "launch_mcd_on_dataset",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_mcd_issue1127",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_mcd_issue3367",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_mcd_support_covariance_is_zero",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_mcd_increasing_det_warning",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.covariance.tests",
- "imports": [],
- "classes": [],
- "functions": []
- },
- {
- "name": "sklearn.cross_decomposition._pls",
- "imports": [
- "import warnings",
- "from abc import ABCMeta",
- "from abc import abstractmethod",
- "import numpy as np",
- "from scipy.linalg import pinv2",
- "from scipy.linalg import svd",
- "from base import BaseEstimator",
- "from base import RegressorMixin",
- "from base import TransformerMixin",
- "from base import MultiOutputMixin",
- "from utils import check_array",
- "from utils import check_consistent_length",
- "from utils.extmath import svd_flip",
- "from utils.validation import check_is_fitted",
- "from utils.validation import FLOAT_DTYPES",
- "from utils.validation import _deprecate_positional_args",
- "from exceptions import ConvergenceWarning",
- "from utils.deprecation import deprecated"
- ],
- "classes": [
- {
- "name": "_PLS",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training vectors, where `n_samples` is the number of samples and `n_features` is the number of predictors."
- },
- {
- "name": "Y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target vectors, where `n_samples` is the number of samples and `n_targets` is the number of response variables."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fit model to data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where `n_samples` is the number of samples and\n `n_features` is the number of predictors.\n\nY : array-like of shape (n_samples,) or (n_samples, n_targets)\n Target vectors, where `n_samples` is the number of samples and\n `n_targets` is the number of response variables."
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Samples to transform."
- },
- {
- "name": "Y",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target vectors."
- },
- {
- "name": "copy",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to copy `X` and `Y`, or perform in-place normalization."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Apply the dimension reduction.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Samples to transform.\n\nY : array-like of shape (n_samples, n_targets), default=None\n Target vectors.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y`, or perform in-place normalization.\n\nReturns\n-------\n`x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise."
- },
- {
- "name": "inverse_transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "New data, where `n_samples` is the number of samples and `n_components` is the number of pls components."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Transform data back to its original space.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_components)\n New data, where `n_samples` is the number of samples\n and `n_components` is the number of pls components.\n\nReturns\n-------\nx_reconstructed : array-like of shape (n_samples, n_features)\n\nNotes\n-----\nThis transformation will only be exact if `n_components=n_features`."
- },
- {
- "name": "predict",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Samples."
- },
- {
- "name": "copy",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to copy `X` and `Y`, or perform in-place normalization."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Predict targets of given samples.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Samples.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y`, or perform in-place normalization.\n\nNotes\n-----\nThis call requires the estimation of a matrix of shape\n`(n_features, n_targets)`, which may be an issue in high dimensional\nspace."
- },
- {
- "name": "fit_transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training vectors, where n_samples is the number of samples and n_features is the number of predictors."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Target vectors, where n_samples is the number of samples and n_targets is the number of response variables."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Learn and apply the dimension reduction on the train data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training vectors, where n_samples is the number of samples and\n n_features is the number of predictors.\n\ny : array-like of shape (n_samples, n_targets), default=None\n Target vectors, where n_samples is the number of samples and\n n_targets is the number of response variables.\n\nReturns\n-------\nx_scores if Y is not given, (x_scores, y_scores) otherwise."
- },
- {
- "name": "norm_y_weights",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "x_mean_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "y_mean_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "x_std_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "y_std_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "x_scores_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "y_scores_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_more_tags",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Partial Least Squares (PLS)\n\nThis class implements the generic PLS algorithm.\n\nMain ref: Wegelin, a survey of Partial Least Squares (PLS) methods,\nwith emphasis on the two-block case\nhttps://www.stat.washington.edu/research/reports/2000/tr371.pdf"
- },
- {
- "name": "PLSRegression",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "n_components",
- "type": "int",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of components to keep. Should be in `[1, min(n_samples, n_features, n_targets)]`."
- },
- {
- "name": "scale",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to scale `X` and `Y`."
- },
- {
- "name": "algorithm",
- "type": "Literal['nipals', 'svd']",
- "hasDefault": true,
- "default": "'nipals'",
- "limitation": null,
- "ignored": false,
- "docstring": "The algorithm used to estimate the first singular vectors of the cross-covariance matrix. 'nipals' uses the power method while 'svd' will compute the whole SVD."
- },
- {
- "name": "max_iter",
- "type": "int",
- "hasDefault": true,
- "default": "500",
- "limitation": null,
- "ignored": false,
- "docstring": "The maximum number of iterations of the power method when `algorithm='nipals'`. Ignored otherwise."
- },
- {
- "name": "tol",
- "type": "float",
- "hasDefault": true,
- "default": "1e-06",
- "limitation": null,
- "ignored": false,
- "docstring": "The tolerance used as convergence criteria in the power method: the algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less than `tol`, where `u` corresponds to the left singular vector."
- },
- {
- "name": "copy",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to copy `X` and `Y` in fit before applying centering, and potentially scaling. If False, these operations will be done inplace, modifying both arrays."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "PLS regression\n\nPLSRegression is also known as PLS2 or PLS1, depending on the number of\ntargets.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8\n\nParameters\n----------\nn_components : int, default=2\n Number of components to keep. Should be in `[1, min(n_samples,\n n_features, n_targets)]`.\n\nscale : bool, default=True\n Whether to scale `X` and `Y`.\n\nalgorithm : {'nipals', 'svd'}, default='nipals'\n The algorithm used to estimate the first singular vectors of the\n cross-covariance matrix. 'nipals' uses the power method while 'svd'\n will compute the whole SVD.\n\nmax_iter : int, default=500\n The maximum number of iterations of the power method when\n `algorithm='nipals'`. Ignored otherwise.\n\ntol : float, default=1e-06\n The tolerance used as convergence criteria in the power method: the\n algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n than `tol`, where `u` corresponds to the left singular vector.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the cross-covariance matrices of each\n iteration.\n\ny_weights_ : ndarray of shape (n_targets, n_components)\n The right singular vectors of the cross-covariance matrices of each\n iteration.\n\nx_loadings_ : ndarray of shape (n_features, n_components)\n The loadings of `X`.\n\ny_loadings_ : ndarray of shape (n_targets, n_components)\n The loadings of `Y`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\nx_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `X`.\n\ny_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `Y`.\n\ncoef_ : ndarray of shape (n_features, n_targets)\n The coefficients of the linear model such that `Y` is approximated as\n `Y = X @ coef_`.\n\nn_iter_ : list of shape (n_components,)\n Number of iterations of the power method, for each\n component. Empty if `algorithm='svd'`.\n\nExamples\n--------\n>>> from sklearn.cross_decomposition import PLSRegression\n>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]\n>>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n>>> pls2 = PLSRegression(n_components=2)\n>>> pls2.fit(X, Y)\nPLSRegression()\n>>> Y_pred = pls2.predict(X)"
- },
- {
- "name": "PLSCanonical",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "n_components",
- "type": "int",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of components to keep. Should be in `[1, min(n_samples, n_features, n_targets)]`."
- },
- {
- "name": "scale",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to scale `X` and `Y`."
- },
- {
- "name": "algorithm",
- "type": "Literal['nipals', 'svd']",
- "hasDefault": true,
- "default": "'nipals'",
- "limitation": null,
- "ignored": false,
- "docstring": "The algorithm used to estimate the first singular vectors of the cross-covariance matrix. 'nipals' uses the power method while 'svd' will compute the whole SVD."
- },
- {
- "name": "max_iter",
- "type": "int",
- "hasDefault": true,
- "default": "500",
- "limitation": null,
- "ignored": false,
- "docstring": "the maximum number of iterations of the power method when `algorithm='nipals'`. Ignored otherwise."
- },
- {
- "name": "tol",
- "type": "float",
- "hasDefault": true,
- "default": "1e-06",
- "limitation": null,
- "ignored": false,
- "docstring": "The tolerance used as convergence criteria in the power method: the algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less than `tol`, where `u` corresponds to the left singular vector."
- },
- {
- "name": "copy",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to copy `X` and `Y` in fit before applying centering, and potentially scaling. If False, these operations will be done inplace, modifying both arrays."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Partial Least Squares transformer and regressor.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8\n\nParameters\n----------\nn_components : int, default=2\n Number of components to keep. Should be in `[1, min(n_samples,\n n_features, n_targets)]`.\n\nscale : bool, default=True\n Whether to scale `X` and `Y`.\n\nalgorithm : {'nipals', 'svd'}, default='nipals'\n The algorithm used to estimate the first singular vectors of the\n cross-covariance matrix. 'nipals' uses the power method while 'svd'\n will compute the whole SVD.\n\nmax_iter : int, default=500\n the maximum number of iterations of the power method when\n `algorithm='nipals'`. Ignored otherwise.\n\ntol : float, default=1e-06\n The tolerance used as convergence criteria in the power method: the\n algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n than `tol`, where `u` corresponds to the left singular vector.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the cross-covariance matrices of each\n iteration.\n\ny_weights_ : ndarray of shape (n_targets, n_components)\n The right singular vectors of the cross-covariance matrices of each\n iteration.\n\nx_loadings_ : ndarray of shape (n_features, n_components)\n The loadings of `X`.\n\ny_loadings_ : ndarray of shape (n_targets, n_components)\n The loadings of `Y`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\n .. deprecated:: 0.24\n `x_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\n .. deprecated:: 0.24\n `y_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\nx_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `X`.\n\ny_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `Y`.\n\ncoef_ : ndarray of shape (n_features, n_targets)\n The coefficients of the linear model such that `Y` is approximated as\n `Y = X @ coef_`.\n\nn_iter_ : list of shape (n_components,)\n Number of iterations of the power method, for each\n component. Empty if `algorithm='svd'`.\n\nExamples\n--------\n>>> from sklearn.cross_decomposition import PLSCanonical\n>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]\n>>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n>>> plsca = PLSCanonical(n_components=2)\n>>> plsca.fit(X, Y)\nPLSCanonical()\n>>> X_c, Y_c = plsca.transform(X, Y)\n\nSee Also\n--------\nCCA\nPLSSVD"
- },
- {
- "name": "CCA",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "n_components",
- "type": "int",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "Number of components to keep. Should be in `[1, min(n_samples, n_features, n_targets)]`."
- },
- {
- "name": "scale",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to scale `X` and `Y`."
- },
- {
- "name": "max_iter",
- "type": "int",
- "hasDefault": true,
- "default": "500",
- "limitation": null,
- "ignored": false,
- "docstring": "the maximum number of iterations of the power method."
- },
- {
- "name": "tol",
- "type": "float",
- "hasDefault": true,
- "default": "1e-06",
- "limitation": null,
- "ignored": false,
- "docstring": "The tolerance used as convergence criteria in the power method: the algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less than `tol`, where `u` corresponds to the left singular vector."
- },
- {
- "name": "copy",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to copy `X` and `Y` in fit before applying centering, and potentially scaling. If False, these operations will be done inplace, modifying both arrays."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ],
- "docstring": "Canonical Correlation Analysis, also known as \"Mode B\" PLS.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_components : int, default=2\n Number of components to keep. Should be in `[1, min(n_samples,\n n_features, n_targets)]`.\n\nscale : bool, default=True\n Whether to scale `X` and `Y`.\n\nmax_iter : int, default=500\n the maximum number of iterations of the power method.\n\ntol : float, default=1e-06\n The tolerance used as convergence criteria in the power method: the\n algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less\n than `tol`, where `u` corresponds to the left singular vector.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the cross-covariance matrices of each\n iteration.\n\ny_weights_ : ndarray of shape (n_targets, n_components)\n The right singular vectors of the cross-covariance matrices of each\n iteration.\n\nx_loadings_ : ndarray of shape (n_features, n_components)\n The loadings of `X`.\n\ny_loadings_ : ndarray of shape (n_targets, n_components)\n The loadings of `Y`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\n .. deprecated:: 0.24\n `x_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\n .. deprecated:: 0.24\n `y_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\nx_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `X`.\n\ny_rotations_ : ndarray of shape (n_features, n_components)\n The projection matrix used to transform `Y`.\n\ncoef_ : ndarray of shape (n_features, n_targets)\n The coefficients of the linear model such that `Y` is approximated as\n `Y = X @ coef_`.\n\nn_iter_ : list of shape (n_components,)\n Number of iterations of the power method, for each\n component.\n\nExamples\n--------\n>>> from sklearn.cross_decomposition import CCA\n>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]\n>>> Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]\n>>> cca = CCA(n_components=1)\n>>> cca.fit(X, Y)\nCCA(n_components=1)\n>>> X_c, Y_c = cca.transform(X, Y)\n\nSee Also\n--------\nPLSCanonical\nPLSSVD"
- },
- {
- "name": "PLSSVD",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [
- {
- "name": "n_components",
- "type": "int",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of components to keep. Should be in `[1, min(n_samples, n_features, n_targets)]`."
- },
- {
- "name": "scale",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to scale `X` and `Y`."
- },
- {
- "name": "copy",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to copy `X` and `Y` in fit before applying centering, and potentially scaling. If False, these operations will be done inplace, modifying both arrays."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fit",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training samples."
- },
- {
- "name": "Y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Targets."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fit model to data.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training samples.\n\nY : array-like of shape (n_samples,) or (n_samples, n_targets)\n Targets."
- },
- {
- "name": "x_scores_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "y_scores_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "x_mean_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "y_mean_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "x_std_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "y_std_",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Samples to be transformed."
- },
- {
- "name": "Y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Targets."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Apply the dimensionality reduction.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Samples to be transformed.\n\nY : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n Targets.\n\nReturns\n-------\nout : array-like or tuple of array-like\n The transformed data `X_tranformed` if `Y` is not None,\n `(X_transformed, Y_transformed)` otherwise."
- },
- {
- "name": "fit_transform",
- "decorators": [],
- "parameters": [
- {
- "name": "X",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Training samples."
- },
- {
- "name": "y",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Targets."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Learn and apply the dimensionality reduction.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n Training samples.\n\ny : array-like of shape (n_samples,) or (n_samples, n_targets), default=None\n Targets.\n\nReturns\n-------\nout : array-like or tuple of array-like\n The transformed data `X_tranformed` if `Y` is not None,\n `(X_transformed, Y_transformed)` otherwise."
- }
- ],
- "docstring": "Partial Least Square SVD.\n\nThis transformer simply performs a SVD on the crosscovariance matrix X'Y.\nIt is able to project both the training data `X` and the targets `Y`. The\ntraining data X is projected on the left singular vectors, while the\ntargets are projected on the right singular vectors.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.8\n\nParameters\n----------\nn_components : int, default=2\n The number of components to keep. Should be in `[1,\n min(n_samples, n_features, n_targets)]`.\n\nscale : bool, default=True\n Whether to scale `X` and `Y`.\n\ncopy : bool, default=True\n Whether to copy `X` and `Y` in fit before applying centering, and\n potentially scaling. If False, these operations will be done inplace,\n modifying both arrays.\n\nAttributes\n----------\nx_weights_ : ndarray of shape (n_features, n_components)\n The left singular vectors of the SVD of the cross-covariance matrix.\n Used to project `X` in `transform`.\n\ny_weights_ : ndarray of (n_targets, n_components)\n The right singular vectors of the SVD of the cross-covariance matrix.\n Used to project `X` in `transform`.\n\nx_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training samples.\n\n .. deprecated:: 0.24\n `x_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\ny_scores_ : ndarray of shape (n_samples, n_components)\n The transformed training targets.\n\n .. deprecated:: 0.24\n `y_scores_` is deprecated in 0.24 and will be removed in 1.1\n (renaming of 0.26). You can just call `transform` on the training\n data instead.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.cross_decomposition import PLSSVD\n>>> X = np.array([[0., 0., 1.],\n... [1., 0., 0.],\n... [2., 2., 2.],\n... [2., 5., 4.]])\n>>> Y = np.array([[0.1, -0.2],\n... [0.9, 1.1],\n... [6.2, 5.9],\n... [11.9, 12.3]])\n>>> pls = PLSSVD(n_components=2).fit(X, Y)\n>>> X_c, Y_c = pls.transform(X, Y)\n>>> X_c.shape, Y_c.shape\n((4, 2), (4, 2))\n\nSee Also\n--------\nPLSCanonical\nCCA"
- }
- ],
- "functions": [
- {
- "name": "_get_first_singular_vectors_power_method",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return the first left and right singular vectors of X'Y.\n\nProvides an alternative to the svd(X'Y) and uses the power method instead.\nWith norm_y_weights to True and in mode A, this corresponds to the\nalgorithm section 11.3 of the Wegelin's review, except this starts at the\n\"update saliences\" part."
- },
- {
- "name": "_get_first_singular_vectors_svd",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return the first left and right singular vectors of X'Y.\n\nHere the whole SVD is computed."
- },
- {
- "name": "_center_scale_xy",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Center X, Y and scale if the scale parameter==True\n\nReturns\n-------\n X, Y, x_mean, y_mean, x_std, y_std"
- },
- {
- "name": "_svd_flip_1d",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Same as svd_flip but works on 1d arrays, and is inplace"
- }
- ]
- },
- {
- "name": "sklearn.cross_decomposition",
- "imports": [
- "from _pls import PLSCanonical",
- "from _pls import PLSRegression",
- "from _pls import PLSSVD",
- "from _pls import CCA"
- ],
- "classes": [],
- "functions": []
- },
- {
- "name": "sklearn.cross_decomposition.tests.test_pls",
- "imports": [
- "import pytest",
- "import numpy as np",
- "from numpy.testing import assert_array_almost_equal",
- "from numpy.testing import assert_array_equal",
- "from numpy.testing import assert_allclose",
- "from sklearn.datasets import load_linnerud",
- "from sklearn.cross_decomposition._pls import _center_scale_xy",
- "from sklearn.cross_decomposition._pls import _get_first_singular_vectors_power_method",
- "from sklearn.cross_decomposition._pls import _get_first_singular_vectors_svd",
- "from sklearn.cross_decomposition._pls import _svd_flip_1d",
- "from sklearn.cross_decomposition import CCA",
- "from sklearn.cross_decomposition import PLSSVD",
- "from sklearn.cross_decomposition import PLSRegression",
- "from sklearn.cross_decomposition import PLSCanonical",
- "from sklearn.datasets import make_regression",
- "from sklearn.utils import check_random_state",
- "from sklearn.utils.extmath import svd_flip",
- "from sklearn.exceptions import ConvergenceWarning"
- ],
- "classes": [],
- "functions": [
- {
- "name": "assert_matrix_orthogonal",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_pls_canonical_basics",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_sanity_check_pls_regression",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_sanity_check_pls_regression_constant_column_Y",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_sanity_check_pls_canonical",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_sanity_check_pls_canonical_random",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_convergence_fail",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_attibutes_shapes",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_univariate_equivalence",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_copy",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_generate_test_scale_and_stability_datasets",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generate dataset for test_scale_and_stability"
- },
- {
- "name": "test_scale_and_stability",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "scale=True is equivalent to scale=False on centered/scaled data\nThis allows to check numerical stability over platforms as well"
- },
- {
- "name": "test_n_components_bounds",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_n_components_bounds_pls_regression",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_scores_deprecations",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_norm_y_weights_deprecation",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_mean_and_std_deprecation",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_singular_value_helpers",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_one_component_equivalence",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "test_svd_flip_1d",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.cross_decomposition.tests",
- "imports": [],
- "classes": [],
- "functions": []
- },
- {
- "name": "sklearn.datasets.setup",
- "imports": [
- "import numpy",
- "import os",
- "import platform",
- "from numpy.distutils.misc_util import Configuration",
- "from numpy.distutils.core import setup"
- ],
- "classes": [],
- "functions": [
- {
- "name": "configuration",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- }
- ]
- },
- {
- "name": "sklearn.datasets._base",
- "imports": [
- "import csv",
- "import hashlib",
- "import os",
- "import shutil",
- "from collections import namedtuple",
- "from os import environ",
- "from os import listdir",
- "from os import makedirs",
- "from os.path import dirname",
- "from os.path import exists",
- "from os.path import expanduser",
- "from os.path import isdir",
- "from os.path import join",
- "from os.path import splitext",
- "from utils import Bunch",
- "from utils import check_random_state",
- "from utils import check_pandas_support",
- "from utils.validation import _deprecate_positional_args",
- "import numpy as np",
- "from urllib.request import urlretrieve",
- "from externals._pilutil import imread"
- ],
- "classes": [],
- "functions": [
- {
- "name": "get_data_home",
- "decorators": [],
- "parameters": [
- {
- "name": "data_home",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The path to scikit-learn data directory. If `None`, the default path is `~/sklearn_learn_data`."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return the path of the scikit-learn data dir.\n\nThis folder is used by some large dataset loaders to avoid downloading the\ndata several times.\n\nBy default the data dir is set to a folder named 'scikit_learn_data' in the\nuser home folder.\n\nAlternatively, it can be set by the 'SCIKIT_LEARN_DATA' environment\nvariable or programmatically by giving an explicit folder path. The '~'\nsymbol is expanded to the user home folder.\n\nIf the folder does not already exist, it is automatically created.\n\nParameters\n----------\ndata_home : str, default=None\n The path to scikit-learn data directory. If `None`, the default path\n is `~/sklearn_learn_data`."
- },
- {
- "name": "clear_data_home",
- "decorators": [],
- "parameters": [
- {
- "name": "data_home",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The path to scikit-learn data directory. If `None`, the default path is `~/sklearn_learn_data`."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Delete all the content of the data home cache.\n\nParameters\n----------\ndata_home : str, default=None\n The path to scikit-learn data directory. If `None`, the default path\n is `~/sklearn_learn_data`."
- },
- {
- "name": "_convert_data_dataframe",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "load_files",
- "decorators": [],
- "parameters": [
- {
- "name": "container_path",
- "type": "str",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Path to the main folder holding one subfolder per category"
- },
- {
- "name": "description",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "A paragraph describing the characteristic of the dataset: its source, reference, etc."
- },
- {
- "name": "categories",
- "type": "List[str]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If None (default), load all the categories. If not None, list of category names to load (other categories ignored)."
- },
- {
- "name": "load_content",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to load or not the content of the different files. If true a 'data' attribute containing the text information is present in the data structure returned. If not, a filenames attribute gives the path to the files."
- },
- {
- "name": "shuffle",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether or not to shuffle the data: might be important for models that make the assumption that the samples are independent and identically distributed (i.i.d.), such as stochastic gradient descent."
- },
- {
- "name": "encoding",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If None, do not try to decode the content of the files (e.g. for images or other non-text content). If not None, encoding to use to decode text files to Unicode if load_content is True."
- },
- {
- "name": "decode_error",
- "type": "Literal['strict', 'ignore', 'replace']",
- "hasDefault": true,
- "default": "'strict'",
- "limitation": null,
- "ignored": false,
- "docstring": "Instruction on what to do if a byte sequence is given to analyze that contains characters not of the given `encoding`. Passed as keyword argument 'errors' to bytes.decode."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for dataset shuffling. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Load text files with categories as subfolder names.\n\nIndividual samples are assumed to be files stored a two levels folder\nstructure such as the following:\n\n container_folder/\n category_1_folder/\n file_1.txt\n file_2.txt\n ...\n file_42.txt\n category_2_folder/\n file_43.txt\n file_44.txt\n ...\n\nThe folder names are used as supervised signal label names. The individual\nfile names are not important.\n\nThis function does not try to extract features into a numpy array or scipy\nsparse matrix. In addition, if load_content is false it does not try to\nload the files in memory.\n\nTo use text files in a scikit-learn classification or clustering algorithm,\nyou will need to use the :mod`~sklearn.feature_extraction.text` module to\nbuild a feature extraction transformer that suits your problem.\n\nIf you set load_content=True, you should also specify the encoding of the\ntext using the 'encoding' parameter. For many modern text files, 'utf-8'\nwill be the correct encoding. If you leave encoding equal to None, then the\ncontent will be made of bytes instead of Unicode, and you will not be able\nto use most functions in :mod:`~sklearn.feature_extraction.text`.\n\nSimilar feature extractors should be built for other kind of unstructured\ndata input such as images, audio, video, ...\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ncontainer_path : str or unicode\n Path to the main folder holding one subfolder per category\n\ndescription : str or unicode, default=None\n A paragraph describing the characteristic of the dataset: its source,\n reference, etc.\n\ncategories : list of str, default=None\n If None (default), load all the categories. If not None, list of\n category names to load (other categories ignored).\n\nload_content : bool, default=True\n Whether to load or not the content of the different files. If true a\n 'data' attribute containing the text information is present in the data\n structure returned. If not, a filenames attribute gives the path to the\n files.\n\nshuffle : bool, default=True\n Whether or not to shuffle the data: might be important for models that\n make the assumption that the samples are independent and identically\n distributed (i.i.d.), such as stochastic gradient descent.\n\nencoding : str, default=None\n If None, do not try to decode the content of the files (e.g. for images\n or other non-text content). If not None, encoding to use to decode text\n files to Unicode if load_content is True.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n Instruction on what to do if a byte sequence is given to analyze that\n contains characters not of the given `encoding`. Passed as keyword\n argument 'errors' to bytes.decode.\n\nrandom_state : int, RandomState instance or None, default=0\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : list of str\n Only present when `load_content=True`.\n The raw text data to learn.\n target : ndarray\n The target labels (integer index).\n target_names : list\n The names of target classes.\n DESCR : str\n The full description of the dataset.\n filenames: ndarray\n The filenames holding the dataset."
- },
- {
- "name": "load_data",
- "decorators": [],
- "parameters": [
- {
- "name": "module_path",
- "type": "str",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The module path."
- },
- {
- "name": "data_file_name",
- "type": "str",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Name of csv file to be loaded from module_path/data/data_file_name. For example 'wine_data.csv'."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Loads data from module_path/data/data_file_name.\n\nParameters\n----------\nmodule_path : string\n The module path.\n\ndata_file_name : string\n Name of csv file to be loaded from\n module_path/data/data_file_name. For example 'wine_data.csv'.\n\nReturns\n-------\ndata : Numpy array\n A 2D array with each row representing one sample and each column\n representing the features of a given sample.\n\ntarget : Numpy array\n A 1D array holding target variables for all the samples in `data.\n For example target[0] is the target varible for data[0].\n\ntarget_names : Numpy array\n A 1D array containing the names of the classifications. For example\n target_names[0] is the name of the target[0] class."
- },
- {
- "name": "load_wine",
- "decorators": [],
- "parameters": [
- {
- "name": "return_X_y",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object."
- },
- {
- "name": "as_frame",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Load and return the wine dataset (classification).\n\n.. versionadded:: 0.18\n\nThe wine dataset is a classic and very easy multi-class classification\ndataset.\n\n================= ==============\nClasses 3\nSamples per class [59,71,48]\nSamples total 178\nDimensionality 13\nFeatures real, positive\n================= ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (178, 13)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (178,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n frame: DataFrame of shape (178, 14)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\nThe copy of UCI ML Wine Data Set dataset is downloaded and modified to fit\nstandard format from:\nhttps://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\n\nExamples\n--------\nLet's say you are interested in the samples 10, 80, and 140, and want to\nknow their class name.\n\n>>> from sklearn.datasets import load_wine\n>>> data = load_wine()\n>>> data.target[[10, 80, 140]]\narray([0, 1, 2])\n>>> list(data.target_names)\n['class_0', 'class_1', 'class_2']"
- },
- {
- "name": "load_iris",
- "decorators": [],
- "parameters": [
- {
- "name": "return_X_y",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18"
- },
- {
- "name": "as_frame",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Load and return the iris dataset (classification).\n\nThe iris dataset is a classic and very easy multi-class classification\ndataset.\n\n================= ==============\nClasses 3\nSamples per class 50\nSamples total 150\nDimensionality 4\nFeatures real, positive\n================= ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object. See\n below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (150, 4)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (150,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n frame: DataFrame of shape (150, 5)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n filename: str\n The path to the location of the data.\n\n .. versionadded:: 0.20\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\nNotes\n-----\n .. versionchanged:: 0.20\n Fixed two wrong data points according to Fisher's paper.\n The new version is the same as in R, but not as in the UCI\n Machine Learning Repository.\n\nExamples\n--------\nLet's say you are interested in the samples 10, 25, and 50, and want to\nknow their class name.\n\n>>> from sklearn.datasets import load_iris\n>>> data = load_iris()\n>>> data.target[[10, 25, 50]]\narray([0, 0, 1])\n>>> list(data.target_names)\n['setosa', 'versicolor', 'virginica']"
- },
- {
- "name": "load_breast_cancer",
- "decorators": [],
- "parameters": [
- {
- "name": "return_X_y",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18"
- },
- {
- "name": "as_frame",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Load and return the breast cancer wisconsin dataset (classification).\n\nThe breast cancer dataset is a classic and very easy binary classification\ndataset.\n\n================= ==============\nClasses 2\nSamples per class 212(M),357(B)\nSamples total 569\nDimensionality 30\nFeatures real, positive\n================= ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (569, 30)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (569,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n frame: DataFrame of shape (569, 31)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n filename: str\n The path to the location of the data.\n\n .. versionadded:: 0.20\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\nThe copy of UCI ML Breast Cancer Wisconsin (Diagnostic) dataset is\ndownloaded from:\nhttps://goo.gl/U2Uwz2\n\nExamples\n--------\nLet's say you are interested in the samples 10, 50, and 85, and want to\nknow their class name.\n\n>>> from sklearn.datasets import load_breast_cancer\n>>> data = load_breast_cancer()\n>>> data.target[[10, 50, 85]]\narray([0, 1, 0])\n>>> list(data.target_names)\n['malignant', 'benign']"
- },
- {
- "name": "load_digits",
- "decorators": [],
- "parameters": [
- {
- "name": "n_class",
- "type": "int",
- "hasDefault": true,
- "default": "10",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of classes to return. Between 0 and 10."
- },
- {
- "name": "return_X_y",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18"
- },
- {
- "name": "as_frame",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Load and return the digits dataset (classification).\n\nEach datapoint is a 8x8 image of a digit.\n\n================= ==============\nClasses 10\nSamples per class ~180\nSamples total 1797\nDimensionality 64\nFeatures integers 0-16\n================= ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_class : int, default=10\n The number of classes to return. Between 0 and 10.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (1797, 64)\n The flattened data matrix. If `as_frame=True`, `data` will be\n a pandas DataFrame.\n target: {ndarray, Series} of shape (1797,)\n The classification target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of target classes.\n\n .. versionadded:: 0.20\n\n frame: DataFrame of shape (1797, 65)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n images: {ndarray} of shape (1797, 8, 8)\n The raw image data.\n DESCR: str\n The full description of the dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\nThis is a copy of the test set of the UCI ML hand-written digits datasets\nhttps://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits\n\nExamples\n--------\nTo load the data and visualize the images::\n\n >>> from sklearn.datasets import load_digits\n >>> digits = load_digits()\n >>> print(digits.data.shape)\n (1797, 64)\n >>> import matplotlib.pyplot as plt #doctest: +SKIP\n >>> plt.gray() #doctest: +SKIP\n >>> plt.matshow(digits.images[0]) #doctest: +SKIP\n >>> plt.show() #doctest: +SKIP"
- },
- {
- "name": "load_diabetes",
- "decorators": [],
- "parameters": [
- {
- "name": "return_X_y",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18"
- },
- {
- "name": "as_frame",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Load and return the diabetes dataset (regression).\n\n============== ==================\nSamples total 442\nDimensionality 10\nFeatures real, -.2 < x < .2\nTargets integer 25 - 346\n============== ==================\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False.\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (442, 10)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, Series} of shape (442,)\n The regression target. If `as_frame=True`, `target` will be\n a pandas Series.\n feature_names: list\n The names of the dataset columns.\n frame: DataFrame of shape (442, 11)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n data_filename: str\n The path to the location of the data.\n target_filename: str\n The path to the location of the target.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18"
- },
- {
- "name": "load_linnerud",
- "decorators": [],
- "parameters": [
- {
- "name": "return_X_y",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18"
- },
- {
- "name": "as_frame",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric, string or categorical). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.23"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Load and return the physical excercise linnerud dataset.\n\nThis dataset is suitable for multi-ouput regression tasks.\n\n============== ============================\nSamples total 20\nDimensionality 3 (for both data and target)\nFeatures integer\nTargets integer\n============== ============================\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string or categorical). The target is\n a pandas DataFrame or Series depending on the number of target columns.\n If `return_X_y` is True, then (`data`, `target`) will be pandas\n DataFrames or Series as described below.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (20, 3)\n The data matrix. If `as_frame=True`, `data` will be a pandas\n DataFrame.\n target: {ndarray, dataframe} of shape (20, 3)\n The regression targets. If `as_frame=True`, `target` will be\n a pandas DataFrame.\n feature_names: list\n The names of the dataset columns.\n target_names: list\n The names of the target columns.\n frame: DataFrame of shape (20, 6)\n Only present when `as_frame=True`. DataFrame with `data` and\n `target`.\n\n .. versionadded:: 0.23\n DESCR: str\n The full description of the dataset.\n data_filename: str\n The path to the location of the data.\n target_filename: str\n The path to the location of the target.\n\n .. versionadded:: 0.20\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18"
- },
- {
- "name": "load_boston",
- "decorators": [],
- "parameters": [
- {
- "name": "return_X_y",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.18"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Load and return the boston house-prices dataset (regression).\n\n============== ==============\nSamples total 506\nDimensionality 13\nFeatures real, positive\nTargets real 5. - 50.\n============== ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object.\n See below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.18\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (506, 13)\n The data matrix.\n target : ndarray of shape (506, )\n The regression target.\n filename : str\n The physical location of boston csv dataset.\n\n .. versionadded:: 0.20\n\n DESCR : str\n The full description of the dataset.\n feature_names : ndarray\n The names of features\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.18\n\nNotes\n-----\n .. versionchanged:: 0.20\n Fixed a wrong data point at [445, 0].\n\nExamples\n--------\n>>> from sklearn.datasets import load_boston\n>>> X, y = load_boston(return_X_y=True)\n>>> print(X.shape)\n(506, 13)"
- },
- {
- "name": "load_sample_images",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Load sample images for image manipulation.\n\nLoads both, ``china`` and ``flower``.\n\nRead more in the :ref:`User Guide `.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n images : list of ndarray of shape (427, 640, 3)\n The two sample image.\n filenames : list\n The filenames for the images.\n DESCR : str\n The full description of the dataset.\n\nExamples\n--------\nTo load the data and visualize the images:\n\n>>> from sklearn.datasets import load_sample_images\n>>> dataset = load_sample_images() #doctest: +SKIP\n>>> len(dataset.images) #doctest: +SKIP\n2\n>>> first_img_data = dataset.images[0] #doctest: +SKIP\n>>> first_img_data.shape #doctest: +SKIP\n(427, 640, 3)\n>>> first_img_data.dtype #doctest: +SKIP\ndtype('uint8')"
- },
- {
- "name": "load_sample_image",
- "decorators": [],
- "parameters": [
- {
- "name": "image_name",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The name of the sample image loaded"
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Load the numpy array of a single sample image\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nimage_name : {`china.jpg`, `flower.jpg`}\n The name of the sample image loaded\n\nReturns\n-------\nimg : 3D array\n The image as a numpy array: height x width x color\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_sample_image\n>>> china = load_sample_image('china.jpg') # doctest: +SKIP\n>>> china.dtype # doctest: +SKIP\ndtype('uint8')\n>>> china.shape # doctest: +SKIP\n(427, 640, 3)\n>>> flower = load_sample_image('flower.jpg') # doctest: +SKIP\n>>> flower.dtype # doctest: +SKIP\ndtype('uint8')\n>>> flower.shape # doctest: +SKIP\n(427, 640, 3)"
- },
- {
- "name": "_pkl_filepath",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Return filename for Python 3 pickles\n\nargs[-1] is expected to be the \".pkl\" filename. For compatibility with\nolder scikit-learn versions, a suffix is inserted before the extension.\n\n_pkl_filepath('/path/to/folder', 'filename.pkl') returns\n'/path/to/folder/filename_py3.pkl'"
- },
- {
- "name": "_sha256",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Calculate the sha256 hash of the file at path."
- },
- {
- "name": "_fetch_remote",
- "decorators": [],
- "parameters": [
- {
- "name": "remote",
- "type": null,
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Named tuple containing remote dataset meta information: url, filename and checksum"
- },
- {
- "name": "dirname",
- "type": "str",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Directory to save the file to."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Helper function to download a remote dataset into path\n\nFetch a dataset pointed by remote's url, save into path using remote's\nfilename and ensure its integrity based on the SHA256 Checksum of the\ndownloaded file.\n\nParameters\n----------\nremote : RemoteFileMetadata\n Named tuple containing remote dataset meta information: url, filename\n and checksum\n\ndirname : string\n Directory to save the file to.\n\nReturns\n-------\nfile_path: string\n Full path of the created file."
- }
- ]
- },
- {
- "name": "sklearn.datasets._california_housing",
- "imports": [
- "from os.path import dirname",
- "from os.path import exists",
- "from os.path import join",
- "from os import makedirs",
- "from os import remove",
- "import tarfile",
- "import numpy as np",
- "import logging",
- "import joblib",
- "from None import get_data_home",
- "from _base import _convert_data_dataframe",
- "from _base import _fetch_remote",
- "from _base import _pkl_filepath",
- "from _base import RemoteFileMetadata",
- "from utils import Bunch",
- "from utils.validation import _deprecate_positional_args"
- ],
- "classes": [],
- "functions": [
- {
- "name": "fetch_california_housing",
- "decorators": [],
- "parameters": [
- {
- "name": "data_home",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders."
- },
- {
- "name": "download_if_missing",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site."
- },
- {
- "name": "return_X_y",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, returns ``(data.data, data.target)`` instead of a Bunch object. .. versionadded:: 0.20"
- },
- {
- "name": "as_frame",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric, string or categorical). The target is a pandas DataFrame or Series depending on the number of target_columns. .. versionadded:: 0.23"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Load the California housing dataset (regression).\n\n============== ==============\nSamples total 20640\nDimensionality 8\nFeatures real\nTarget real 0.15 - 5.\n============== ==============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\n\nreturn_X_y : bool, default=False.\n If True, returns ``(data.data, data.target)`` instead of a Bunch\n object.\n\n .. versionadded:: 0.20\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string or categorical). The target is\n a pandas DataFrame or Series depending on the number of target_columns.\n\n .. versionadded:: 0.23\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray, shape (20640, 8)\n Each row corresponding to the 8 feature values in order.\n If ``as_frame`` is True, ``data`` is a pandas object.\n target : numpy array of shape (20640,)\n Each value corresponds to the average\n house value in units of 100,000.\n If ``as_frame`` is True, ``target`` is a pandas object.\n feature_names : list of length 8\n Array of ordered feature names used in the dataset.\n DESCR : string\n Description of the California housing dataset.\n frame : pandas DataFrame\n Only present when `as_frame=True`. DataFrame with ``data`` and\n ``target``.\n\n .. versionadded:: 0.23\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20\n\nNotes\n-----\n\nThis dataset consists of 20,640 samples and 9 features."
- }
- ]
- },
- {
- "name": "sklearn.datasets._covtype",
- "imports": [
- "from gzip import GzipFile",
- "import logging",
- "from os.path import dirname",
- "from os.path import exists",
- "from os.path import join",
- "from os import remove",
- "from os import makedirs",
- "import numpy as np",
- "import joblib",
- "from None import get_data_home",
- "from _base import _convert_data_dataframe",
- "from _base import _fetch_remote",
- "from _base import RemoteFileMetadata",
- "from utils import Bunch",
- "from _base import _pkl_filepath",
- "from utils import check_random_state",
- "from utils.validation import _deprecate_positional_args"
- ],
- "classes": [],
- "functions": [
- {
- "name": "fetch_covtype",
- "decorators": [],
- "parameters": [
- {
- "name": "data_home",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders."
- },
- {
- "name": "download_if_missing",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for dataset shuffling. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- },
- {
- "name": "shuffle",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to shuffle dataset."
- },
- {
- "name": "return_X_y",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, returns ``(data.data, data.target)`` instead of a Bunch object. .. versionadded:: 0.20"
- },
- {
- "name": "as_frame",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric). The target is a pandas DataFrame or Series depending on the number of target columns. If `return_X_y` is True, then (`data`, `target`) will be pandas DataFrames or Series as described below. .. versionadded:: 0.24"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Load the covertype dataset (classification).\n\nDownload it if necessary.\n\n================= ============\nClasses 7\nSamples total 581012\nDimensionality 54\nFeatures int\n================= ============\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nshuffle : bool, default=False\n Whether to shuffle dataset.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data.data, data.target)`` instead of a Bunch\n object.\n\n .. versionadded:: 0.20\n\nas_frame : bool, default=False\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric). The target is a pandas DataFrame or\n Series depending on the number of target columns. If `return_X_y` is\n True, then (`data`, `target`) will be pandas DataFrames or Series as\n described below.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (581012, 54)\n Each row corresponds to the 54 features in the dataset.\n target : ndarray of shape (581012,)\n Each value corresponds to one of\n the 7 forest covertypes with values\n ranging between 1 to 7.\n frame : dataframe of shape (581012, 53)\n Only present when `as_frame=True`. Contains `data` and `target`.\n DESCR : str\n Description of the forest covertype dataset.\n feature_names : list\n The names of the dataset columns.\n target_names: list\n The names of the target columns.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20"
- }
- ]
- },
- {
- "name": "sklearn.datasets._kddcup99",
- "imports": [
- "import errno",
- "from gzip import GzipFile",
- "import logging",
- "import os",
- "from os.path import dirname",
- "from os.path import exists",
- "from os.path import join",
- "import numpy as np",
- "import joblib",
- "from _base import _fetch_remote",
- "from _base import _convert_data_dataframe",
- "from None import get_data_home",
- "from _base import RemoteFileMetadata",
- "from utils import Bunch",
- "from utils import check_random_state",
- "from utils import shuffle as shuffle_method",
- "from utils.validation import _deprecate_positional_args"
- ],
- "classes": [],
- "functions": [
- {
- "name": "fetch_kddcup99",
- "decorators": [],
- "parameters": [
- {
- "name": "subset",
- "type": "Literal['SA', 'SF', 'http', 'smtp']",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "To return the corresponding classical subsets of kddcup 99. If None, return the entire kddcup 99 dataset."
- },
- {
- "name": "data_home",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders. .. versionadded:: 0.19"
- },
- {
- "name": "shuffle",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to shuffle dataset."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for dataset shuffling and for selection of abnormal samples if `subset='SA'`. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- },
- {
- "name": "percent10",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to load only 10 percent of the data."
- },
- {
- "name": "download_if_missing",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site."
- },
- {
- "name": "return_X_y",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` object. .. versionadded:: 0.20"
- },
- {
- "name": "as_frame",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If `True`, returns a pandas Dataframe for the ``data`` and ``target`` objects in the `Bunch` returned object; `Bunch` return object will also have a ``frame`` member. .. versionadded:: 0.24"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Load the kddcup99 dataset (classification).\n\nDownload it if necessary.\n\n================= ====================================\nClasses 23\nSamples total 4898431\nDimensionality 41\nFeatures discrete (int) or continuous (float)\n================= ====================================\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nsubset : {'SA', 'SF', 'http', 'smtp'}, default=None\n To return the corresponding classical subsets of kddcup 99.\n If None, return the entire kddcup 99 dataset.\n\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n .. versionadded:: 0.19\n\nshuffle : bool, default=False\n Whether to shuffle dataset.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling and for\n selection of abnormal samples if `subset='SA'`. Pass an int for\n reproducible output across multiple function calls.\n See :term:`Glossary `.\n\npercent10 : bool, default=True\n Whether to load only 10 percent of the data.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object. See\n below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.20\n\nas_frame : bool, default=False\n If `True`, returns a pandas Dataframe for the ``data`` and ``target``\n objects in the `Bunch` returned object; `Bunch` return object will also\n have a ``frame`` member.\n\n .. versionadded:: 0.24\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : {ndarray, dataframe} of shape (494021, 41)\n The data matrix to learn. If `as_frame=True`, `data` will be a\n pandas DataFrame.\n target : {ndarray, series} of shape (494021,)\n The regression target for each sample. If `as_frame=True`, `target`\n will be a pandas Series.\n frame : dataframe of shape (494021, 42)\n Only present when `as_frame=True`. Contains `data` and `target`.\n DESCR : str\n The full description of the dataset.\n feature_names : list\n The names of the dataset columns\n target_names: list\n The names of the target columns\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20"
- },
- {
- "name": "_fetch_brute_kddcup99",
- "decorators": [],
- "parameters": [
- {
- "name": "data_home",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders."
- },
- {
- "name": "download_if_missing",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site."
- },
- {
- "name": "percent10",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to load only 10 percent of the data."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Load the kddcup99 dataset, downloading it if necessary.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\npercent10 : bool, default=True\n Whether to load only 10 percent of the data.\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (494021, 41)\n Each row corresponds to the 41 features in the dataset.\n target : ndarray of shape (494021,)\n Each value corresponds to one of the 21 attack types or to the\n label 'normal.'.\n feature_names : list\n The names of the dataset columns\n target_names: list\n The names of the target columns\n DESCR : str\n Description of the kddcup99 dataset."
- },
- {
- "name": "_mkdirp",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Ensure directory d exists (like mkdir -p on Unix)\nNo guarantee that the directory is writable."
- }
- ]
- },
- {
- "name": "sklearn.datasets._lfw",
- "imports": [
- "from os import listdir",
- "from os import makedirs",
- "from os import remove",
- "from os.path import dirname",
- "from os.path import join",
- "from os.path import exists",
- "from os.path import isdir",
- "import logging",
- "import numpy as np",
- "import joblib",
- "from joblib import Memory",
- "from _base import get_data_home",
- "from _base import _fetch_remote",
- "from _base import RemoteFileMetadata",
- "from utils import Bunch",
- "from utils.validation import _deprecate_positional_args",
- "from utils.fixes import parse_version",
- "import tarfile",
- "from externals._pilutil import imread",
- "from externals._pilutil import imresize"
- ],
- "classes": [],
- "functions": [
- {
- "name": "_check_fetch_lfw",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Helper function to download any missing LFW data"
- },
- {
- "name": "_load_imgs",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Internally used to load images"
- },
- {
- "name": "_fetch_lfw_people",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Perform the actual data loading for the lfw people dataset\n\nThis operation is meant to be cached by a joblib wrapper."
- },
- {
- "name": "fetch_lfw_people",
- "decorators": [],
- "parameters": [
- {
- "name": "data_home",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders."
- },
- {
- "name": "funneled",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Download and use the funneled variant of the dataset."
- },
- {
- "name": "resize",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Ratio used to resize the each face picture."
- },
- {
- "name": "min_faces_per_person",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The extracted dataset will only retain pictures of people that have at least `min_faces_per_person` different pictures."
- },
- {
- "name": "color",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Keep the 3 RGB channels instead of averaging them to a single gray level channel. If color is True the shape of the data has one more dimension than the shape with color = False."
- },
- {
- "name": "slice_",
- "type": "Tuple[]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Provide a custom 2D slice (height, width) to extract the 'interesting' part of the jpeg files and avoid use statistical correlation from the background"
- },
- {
- "name": "download_if_missing",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site."
- },
- {
- "name": "return_X_y",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch object. See below for more information about the `dataset.data` and `dataset.target` object. .. versionadded:: 0.20"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Load the Labeled Faces in the Wild (LFW) people dataset (classification).\n\nDownload it if necessary.\n\n================= =======================\nClasses 5749\nSamples total 13233\nDimensionality 5828\nFeatures real, between 0 and 255\n================= =======================\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nfunneled : bool, default=True\n Download and use the funneled variant of the dataset.\n\nresize : float, default=0.5\n Ratio used to resize the each face picture.\n\nmin_faces_per_person : int, default=None\n The extracted dataset will only retain pictures of people that have at\n least `min_faces_per_person` different pictures.\n\ncolor : bool, default=False\n Keep the 3 RGB channels instead of averaging them to a single\n gray level channel. If color is True the shape of the data has\n one more dimension than the shape with color = False.\n\nslice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))\n Provide a custom 2D slice (height, width) to extract the\n 'interesting' part of the jpeg files and avoid use statistical\n correlation from the background\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch\n object. See below for more information about the `dataset.data` and\n `dataset.target` object.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : numpy array of shape (13233, 2914)\n Each row corresponds to a ravelled face image\n of original size 62 x 47 pixels.\n Changing the ``slice_`` or resize parameters will change the\n shape of the output.\n images : numpy array of shape (13233, 62, 47)\n Each row is a face image corresponding to one of the 5749 people in\n the dataset. Changing the ``slice_``\n or resize parameters will change the shape of the output.\n target : numpy array of shape (13233,)\n Labels associated to each face image.\n Those labels range from 0-5748 and correspond to the person IDs.\n DESCR : string\n Description of the Labeled Faces in the Wild (LFW) dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20"
- },
- {
- "name": "_fetch_lfw_pairs",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Perform the actual data loading for the LFW pairs dataset\n\nThis operation is meant to be cached by a joblib wrapper."
- },
- {
- "name": "fetch_lfw_pairs",
- "decorators": [],
- "parameters": [
- {
- "name": "subset",
- "type": "Literal['train', 'test', '10_folds']",
- "hasDefault": true,
- "default": "'train'",
- "limitation": null,
- "ignored": false,
- "docstring": "Select the dataset to load: 'train' for the development training set, 'test' for the development test set, and '10_folds' for the official evaluation set that is meant to be used with a 10-folds cross validation."
- },
- {
- "name": "data_home",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders."
- },
- {
- "name": "funneled",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Download and use the funneled variant of the dataset."
- },
- {
- "name": "resize",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Ratio used to resize the each face picture."
- },
- {
- "name": "color",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Keep the 3 RGB channels instead of averaging them to a single gray level channel. If color is True the shape of the data has one more dimension than the shape with color = False."
- },
- {
- "name": "slice_",
- "type": "Tuple[]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Provide a custom 2D slice (height, width) to extract the 'interesting' part of the jpeg files and avoid use statistical correlation from the background"
- },
- {
- "name": "download_if_missing",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Load the Labeled Faces in the Wild (LFW) pairs dataset (classification).\n\nDownload it if necessary.\n\n================= =======================\nClasses 2\nSamples total 13233\nDimensionality 5828\nFeatures real, between 0 and 255\n================= =======================\n\nIn the official `README.txt`_ this task is described as the\n\"Restricted\" task. As I am not sure as to implement the\n\"Unrestricted\" variant correctly, I left it as unsupported for now.\n\n .. _`README.txt`: http://vis-www.cs.umass.edu/lfw/README.txt\n\nThe original images are 250 x 250 pixels, but the default slice and resize\narguments reduce them to 62 x 47.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nsubset : {'train', 'test', '10_folds'}, default='train'\n Select the dataset to load: 'train' for the development training\n set, 'test' for the development test set, and '10_folds' for the\n official evaluation set that is meant to be used with a 10-folds\n cross validation.\n\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By\n default all scikit-learn data is stored in '~/scikit_learn_data'\n subfolders.\n\nfunneled : bool, default=True\n Download and use the funneled variant of the dataset.\n\nresize : float, default=0.5\n Ratio used to resize the each face picture.\n\ncolor : bool, default=False\n Keep the 3 RGB channels instead of averaging them to a single\n gray level channel. If color is True the shape of the data has\n one more dimension than the shape with color = False.\n\nslice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))\n Provide a custom 2D slice (height, width) to extract the\n 'interesting' part of the jpeg files and avoid use statistical\n correlation from the background\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : ndarray of shape (2200, 5828). Shape depends on ``subset``.\n Each row corresponds to 2 ravel'd face images\n of original size 62 x 47 pixels.\n Changing the ``slice_``, ``resize`` or ``subset`` parameters\n will change the shape of the output.\n pairs : ndarray of shape (2200, 2, 62, 47). Shape depends on ``subset``\n Each row has 2 face images corresponding\n to same or different person from the dataset\n containing 5749 people. Changing the ``slice_``,\n ``resize`` or ``subset`` parameters will change the shape of the\n output.\n target : numpy array of shape (2200,). Shape depends on ``subset``.\n Labels associated to each pair of images.\n The two label values being different persons or the same person.\n DESCR : string\n Description of the Labeled Faces in the Wild (LFW) dataset."
- }
- ]
- },
- {
- "name": "sklearn.datasets._olivetti_faces",
- "imports": [
- "from os.path import dirname",
- "from os.path import exists",
- "from os.path import join",
- "from os import makedirs",
- "from os import remove",
- "import numpy as np",
- "from scipy.io.matlab import loadmat",
- "import joblib",
- "from None import get_data_home",
- "from _base import _fetch_remote",
- "from _base import RemoteFileMetadata",
- "from _base import _pkl_filepath",
- "from utils import check_random_state",
- "from utils import Bunch",
- "from utils.validation import _deprecate_positional_args"
- ],
- "classes": [],
- "functions": [
- {
- "name": "fetch_olivetti_faces",
- "decorators": [],
- "parameters": [
- {
- "name": "data_home",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders."
- },
- {
- "name": "shuffle",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True the order of the dataset is shuffled to avoid having images of the same person grouped."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for dataset shuffling. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- },
- {
- "name": "download_if_missing",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site."
- },
- {
- "name": "return_X_y",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, returns `(data, target)` instead of a `Bunch` object. See below for more information about the `data` and `target` object. .. versionadded:: 0.22"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Load the Olivetti faces data-set from AT&T (classification).\n\nDownload it if necessary.\n\n================= =====================\nClasses 40\nSamples total 400\nDimensionality 4096\nFeatures real, between 0 and 1\n================= =====================\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nshuffle : bool, default=False\n If True the order of the dataset is shuffled to avoid having\n images of the same person grouped.\n\nrandom_state : int, RandomState instance or None, default=0\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nreturn_X_y : bool, default=False\n If True, returns `(data, target)` instead of a `Bunch` object. See\n below for more information about the `data` and `target` object.\n\n .. versionadded:: 0.22\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data: ndarray, shape (400, 4096)\n Each row corresponds to a ravelled\n face image of original size 64 x 64 pixels.\n images : ndarray, shape (400, 64, 64)\n Each row is a face image\n corresponding to one of the 40 subjects of the dataset.\n target : ndarray, shape (400,)\n Labels associated to each face image.\n Those labels are ranging from 0-39 and correspond to the\n Subject IDs.\n DESCR : str\n Description of the modified Olivetti Faces Dataset.\n\n(data, target) : tuple if `return_X_y=True`\n .. versionadded:: 0.22"
- }
- ]
- },
- {
- "name": "sklearn.datasets._openml",
- "imports": [
- "import gzip",
- "import json",
- "import os",
- "import shutil",
- "import hashlib",
- "from os.path import join",
- "from warnings import warn",
- "from contextlib import closing",
- "from functools import wraps",
- "from typing import Callable",
- "from typing import Optional",
- "from typing import Dict",
- "from typing import Tuple",
- "from typing import List",
- "from typing import Any",
- "from typing import Union",
- "import itertools",
- "from collections.abc import Generator",
- "from collections import OrderedDict",
- "from functools import partial",
- "from urllib.request import urlopen",
- "from urllib.request import Request",
- "import numpy as np",
- "import scipy.sparse",
- "from externals import _arff",
- "from externals._arff import ArffSparseDataType",
- "from externals._arff import ArffContainerType",
- "from None import get_data_home",
- "from urllib.error import HTTPError",
- "from utils import Bunch",
- "from utils import get_chunk_n_rows",
- "from utils import _chunk_generator",
- "from utils import check_pandas_support",
- "from utils.validation import _deprecate_positional_args"
- ],
- "classes": [
- {
- "name": "OpenMLError",
- "decorators": [],
- "superclasses": [],
- "methods": [
- {
- "name": "__init__",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": ""
- }
- ],
- "docstring": "HTTP 412 is a specific OpenML error code, indicating a generic error"
- }
- ],
- "functions": [
- {
- "name": "_get_local_path",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_retry_with_clean_cache",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "If the first call to the decorated function fails, the local cached\nfile is removed, and the function is called again. If ``data_home`` is\n``None``, then the function is called once."
- },
- {
- "name": "_open_openml_url",
- "decorators": [],
- "parameters": [
- {
- "name": "openml_path",
- "type": "str",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "OpenML URL that will be accessed. This will be prefixes with _OPENML_PREFIX"
- },
- {
- "name": "data_home",
- "type": "str",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Directory to which the files will be cached. If None, no caching will be applied."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Returns a resource from OpenML.org. Caches it to data_home if required.\n\nParameters\n----------\nopenml_path : str\n OpenML URL that will be accessed. This will be prefixes with\n _OPENML_PREFIX\n\ndata_home : str\n Directory to which the files will be cached. If None, no caching will\n be applied.\n\nReturns\n-------\nresult : stream\n A stream to the OpenML resource"
- },
- {
- "name": "_get_json_content_from_openml_api",
- "decorators": [],
- "parameters": [
- {
- "name": "url",
- "type": "str",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The URL to load from. Should be an official OpenML endpoint"
- },
- {
- "name": "error_message",
- "type": "Optional[str]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The error message to raise if an acceptable OpenML error is thrown (acceptable error is, e.g., data id not found. Other errors, like 404's will throw the native error message)"
- },
- {
- "name": "data_home",
- "type": "Optional[str]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Location to cache the response. None if no cache is required."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Loads json data from the openml api\n\nParameters\n----------\nurl : str\n The URL to load from. Should be an official OpenML endpoint\n\nerror_message : str or None\n The error message to raise if an acceptable OpenML error is thrown\n (acceptable error is, e.g., data id not found. Other errors, like 404's\n will throw the native error message)\n\ndata_home : str or None\n Location to cache the response. None if no cache is required.\n\nReturns\n-------\njson_data : json\n the json result from the OpenML server if the call was successful.\n An exception otherwise."
- },
- {
- "name": "_split_sparse_columns",
- "decorators": [],
- "parameters": [
- {
- "name": "arff_data",
- "type": "Tuple[]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "A tuple of three lists of equal size; first list indicating the value, second the x coordinate and the third the y coordinate."
- },
- {
- "name": "include_columns",
- "type": "List",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "A list of columns to include."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "obtains several columns from sparse arff representation. Additionally, the\ncolumn indices are re-labelled, given the columns that are not included.\n(e.g., when including [1, 2, 3], the columns will be relabelled to\n[0, 1, 2])\n\nParameters\n----------\narff_data : tuple\n A tuple of three lists of equal size; first list indicating the value,\n second the x coordinate and the third the y coordinate.\n\ninclude_columns : list\n A list of columns to include.\n\nReturns\n-------\narff_data_new : tuple\n Subset of arff data with only the include columns indicated by the\n include_columns argument."
- },
- {
- "name": "_sparse_data_to_array",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_convert_arff_data",
- "decorators": [],
- "parameters": [
- {
- "name": "arff",
- "type": "Dict",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "As obtained from liac-arff object."
- },
- {
- "name": "col_slice_x",
- "type": "List",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The column indices that are sliced from the original array to return as X data"
- },
- {
- "name": "col_slice_y",
- "type": "List",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The column indices that are sliced from the original array to return as y data"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "converts the arff object into the appropriate matrix type (np.array or\nscipy.sparse.csr_matrix) based on the 'data part' (i.e., in the\nliac-arff dict, the object from the 'data' key)\n\nParameters\n----------\narff : dict\n As obtained from liac-arff object.\n\ncol_slice_x : list\n The column indices that are sliced from the original array to return\n as X data\n\ncol_slice_y : list\n The column indices that are sliced from the original array to return\n as y data\n\nReturns\n-------\nX : np.array or scipy.sparse.csr_matrix\ny : np.array"
- },
- {
- "name": "_feature_to_dtype",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Map feature to dtype for pandas DataFrame\n "
- },
- {
- "name": "_convert_arff_data_dataframe",
- "decorators": [],
- "parameters": [
- {
- "name": "arff",
- "type": "Dict",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "As obtained from liac-arff object."
- },
- {
- "name": "columns",
- "type": "List",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Columns from dataframe to return."
- },
- {
- "name": "features_dict",
- "type": "Dict",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Maps feature name to feature info from openml."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Convert the ARFF object into a pandas DataFrame.\n\nParameters\n----------\narff : dict\n As obtained from liac-arff object.\n\ncolumns : list\n Columns from dataframe to return.\n\nfeatures_dict : dict\n Maps feature name to feature info from openml.\n\nReturns\n-------\nresult : tuple\n tuple with the resulting dataframe"
- },
- {
- "name": "_get_data_info_by_name",
- "decorators": [],
- "parameters": [
- {
- "name": "name",
- "type": "str",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "name of the dataset"
- },
- {
- "name": "version",
- "type": "Union[str, int]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "If version is an integer, the exact name/version will be obtained from OpenML. If version is a string (value: \"active\") it will take the first version from OpenML that is annotated as active. Any other string values except \"active\" are treated as integer."
- },
- {
- "name": "data_home",
- "type": "Optional[str]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Location to cache the response. None if no cache is required."
- }
- ],
- "hasReturnType": false,
- "returnType": null,
- "docstring": "Utilizes the openml dataset listing api to find a dataset by\nname/version\nOpenML api function:\nhttps://www.openml.org/api_docs#!/data/get_data_list_data_name_data_name\n\nParameters\n----------\nname : str\n name of the dataset\n\nversion : int or str\n If version is an integer, the exact name/version will be obtained from\n OpenML. If version is a string (value: \"active\") it will take the first\n version from OpenML that is annotated as active. Any other string\n values except \"active\" are treated as integer.\n\ndata_home : str or None\n Location to cache the response. None if no cache is required.\n\nReturns\n-------\nfirst_dataset : json\n json representation of the first dataset object that adhired to the\n search criteria"
- },
- {
- "name": "_get_data_description_by_id",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_get_data_features",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_get_data_qualities",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_get_num_samples",
- "decorators": [],
- "parameters": [
- {
- "name": "data_qualities",
- "type": "List[Dict]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Used to retrieve the number of instances (samples) in the dataset."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Get the number of samples from data qualities.\n\nParameters\n----------\ndata_qualities : list of dict\n Used to retrieve the number of instances (samples) in the dataset.\n\nReturns\n-------\nn_samples : int\n The number of samples in the dataset or -1 if data qualities are\n unavailable."
- },
- {
- "name": "_load_arff_response",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Load arff data with url and parses arff response with parse_arff"
- },
- {
- "name": "_download_data_to_bunch",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Download OpenML ARFF and convert to Bunch of data\n "
- },
- {
- "name": "_verify_target_data_type",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "_valid_data_column_names",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": null
- },
- {
- "name": "fetch_openml",
- "decorators": [],
- "parameters": [
- {
- "name": "name",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "String identifier of the dataset. Note that OpenML can have multiple datasets with the same name."
- },
- {
- "name": "version",
- "type": "Union[Literal['active'], int]",
- "hasDefault": true,
- "default": "'active'",
- "limitation": null,
- "ignored": false,
- "docstring": "Version of the dataset. Can only be provided if also ``name`` is given. If 'active' the oldest version that's still active is used. Since there may be more than one active version of a dataset, and those versions may fundamentally be different from one another, setting an exact version is highly recommended."
- },
- {
- "name": "data_id",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "OpenML ID of the dataset. The most specific way of retrieving a dataset. If data_id is not given, name (and potential version) are used to obtain a dataset."
- },
- {
- "name": "data_home",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Specify another download and cache folder for the data sets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders."
- },
- {
- "name": "target_column",
- "type": "Optional[Union[List, str]]",
- "hasDefault": true,
- "default": "'default-target'",
- "limitation": null,
- "ignored": false,
- "docstring": "Specify the column name in the data to use as target. If 'default-target', the standard target column a stored on the server is used. If ``None``, all columns are returned as data and the target is ``None``. If list (of strings), all columns with these names are returned as multi-target (Note: not all scikit-learn classifiers can handle all types of multi-output combinations)"
- },
- {
- "name": "cache",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to cache downloaded datasets using joblib."
- },
- {
- "name": "return_X_y",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, returns ``(data, target)`` instead of a Bunch object. See below for more information about the `data` and `target` objects."
- },
- {
- "name": "as_frame",
- "type": "Union[Literal['auto'], bool]",
- "hasDefault": true,
- "default": "'auto'",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the data is a pandas DataFrame including columns with appropriate dtypes (numeric, string or categorical). The target is a pandas DataFrame or Series depending on the number of target_columns. The Bunch will contain a ``frame`` attribute with the target and the data. If ``return_X_y`` is True, then ``(data, target)`` will be pandas DataFrames or Series as describe above. If as_frame is 'auto', the data and target will be converted to DataFrame or Series as if as_frame is set to True, unless the dataset is stored in sparse format. .. versionchanged:: 0.24 The default value of `as_frame` changed from `False` to `'auto'` in 0.24."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Fetch dataset from openml by name or dataset id.\n\nDatasets are uniquely identified by either an integer ID or by a\ncombination of name and version (i.e. there might be multiple\nversions of the 'iris' dataset). Please give either name or data_id\n(not both). In case a name is given, a version can also be\nprovided.\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.20\n\n.. note:: EXPERIMENTAL\n\n The API is experimental (particularly the return value structure),\n and might have small backward-incompatible changes without notice\n or warning in future releases.\n\nParameters\n----------\nname : str, default=None\n String identifier of the dataset. Note that OpenML can have multiple\n datasets with the same name.\n\nversion : int or 'active', default='active'\n Version of the dataset. Can only be provided if also ``name`` is given.\n If 'active' the oldest version that's still active is used. Since\n there may be more than one active version of a dataset, and those\n versions may fundamentally be different from one another, setting an\n exact version is highly recommended.\n\ndata_id : int, default=None\n OpenML ID of the dataset. The most specific way of retrieving a\n dataset. If data_id is not given, name (and potential version) are\n used to obtain a dataset.\n\ndata_home : str, default=None\n Specify another download and cache folder for the data sets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ntarget_column : str, list or None, default='default-target'\n Specify the column name in the data to use as target. If\n 'default-target', the standard target column a stored on the server\n is used. If ``None``, all columns are returned as data and the\n target is ``None``. If list (of strings), all columns with these names\n are returned as multi-target (Note: not all scikit-learn classifiers\n can handle all types of multi-output combinations)\n\ncache : bool, default=True\n Whether to cache downloaded datasets using joblib.\n\nreturn_X_y : bool, default=False\n If True, returns ``(data, target)`` instead of a Bunch object. See\n below for more information about the `data` and `target` objects.\n\nas_frame : bool or 'auto', default='auto'\n If True, the data is a pandas DataFrame including columns with\n appropriate dtypes (numeric, string or categorical). The target is\n a pandas DataFrame or Series depending on the number of target_columns.\n The Bunch will contain a ``frame`` attribute with the target and the\n data. If ``return_X_y`` is True, then ``(data, target)`` will be pandas\n DataFrames or Series as describe above.\n\n If as_frame is 'auto', the data and target will be converted to\n DataFrame or Series as if as_frame is set to True, unless the dataset\n is stored in sparse format.\n\n .. versionchanged:: 0.24\n The default value of `as_frame` changed from `False` to `'auto'`\n in 0.24.\n\nReturns\n-------\n\ndata : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : np.array, scipy.sparse.csr_matrix of floats, or pandas DataFrame\n The feature matrix. Categorical features are encoded as ordinals.\n target : np.array, pandas Series or DataFrame\n The regression target or classification labels, if applicable.\n Dtype is float if numeric, and object if categorical. If\n ``as_frame`` is True, ``target`` is a pandas object.\n DESCR : str\n The full description of the dataset\n feature_names : list\n The names of the dataset columns\n target_names: list\n The names of the target columns\n\n .. versionadded:: 0.22\n\n categories : dict or None\n Maps each categorical feature name to a list of values, such\n that the value encoded as i is ith in the list. If ``as_frame``\n is True, this is None.\n details : dict\n More metadata from OpenML\n frame : pandas DataFrame\n Only present when `as_frame=True`. DataFrame with ``data`` and\n ``target``.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. note:: EXPERIMENTAL\n\n This interface is **experimental** and subsequent releases may\n change attributes without notice (although there should only be\n minor changes to ``data`` and ``target``).\n\n Missing values in the 'data' are represented as NaN's. Missing values\n in 'target' are represented as NaN's (numerical target) or None\n (categorical target)"
- }
- ]
- },
- {
- "name": "sklearn.datasets._rcv1",
- "imports": [
- "import logging",
- "from os import remove",
- "from os import makedirs",
- "from os.path import dirname",
- "from os.path import exists",
- "from os.path import join",
- "from gzip import GzipFile",
- "import numpy as np",
- "import scipy.sparse as sp",
- "import joblib",
- "from None import get_data_home",
- "from _base import _pkl_filepath",
- "from _base import _fetch_remote",
- "from _base import RemoteFileMetadata",
- "from _svmlight_format_io import load_svmlight_files",
- "from utils import shuffle as shuffle_",
- "from utils import Bunch",
- "from utils.validation import _deprecate_positional_args"
- ],
- "classes": [],
- "functions": [
- {
- "name": "fetch_rcv1",
- "decorators": [],
- "parameters": [
- {
- "name": "data_home",
- "type": "str",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders."
- },
- {
- "name": "subset",
- "type": "Literal['train', 'test', 'all']",
- "hasDefault": true,
- "default": "'all'",
- "limitation": null,
- "ignored": false,
- "docstring": "Select the dataset to load: 'train' for the training set (23149 samples), 'test' for the test set (781265 samples), 'all' for both, with the training samples first if shuffle is False. This follows the official LYRL2004 chronological split."
- },
- {
- "name": "download_if_missing",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for dataset shuffling. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- },
- {
- "name": "shuffle",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to shuffle dataset."
- },
- {
- "name": "return_X_y",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch object. See below for more information about the `dataset.data` and `dataset.target` object. .. versionadded:: 0.20"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Load the RCV1 multilabel dataset (classification).\n\nDownload it if necessary.\n\nVersion: RCV1-v2, vectors, full sets, topics multilabels.\n\n================= =====================\nClasses 103\nSamples total 804414\nDimensionality 47236\nFeatures real, between 0 and 1\n================= =====================\n\nRead more in the :ref:`User Guide `.\n\n.. versionadded:: 0.17\n\nParameters\n----------\ndata_home : str, default=None\n Specify another download and cache folder for the datasets. By default\n all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\nsubset : {'train', 'test', 'all'}, default='all'\n Select the dataset to load: 'train' for the training set\n (23149 samples), 'test' for the test set (781265 samples),\n 'all' for both, with the training samples first if shuffle is False.\n This follows the official LYRL2004 chronological split.\n\ndownload_if_missing : bool, default=True\n If False, raise a IOError if the data is not locally available\n instead of trying to download the data from the source site.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nshuffle : bool, default=False\n Whether to shuffle dataset.\n\nreturn_X_y : bool, default=False\n If True, returns ``(dataset.data, dataset.target)`` instead of a Bunch\n object. See below for more information about the `dataset.data` and\n `dataset.target` object.\n\n .. versionadded:: 0.20\n\nReturns\n-------\ndataset : :class:`~sklearn.utils.Bunch`\n Dictionary-like object, with the following attributes.\n\n data : sparse matrix of shape (804414, 47236), dtype=np.float64\n The array has 0.16% of non zero values. Will be of CSR format.\n target : sparse matrix of shape (804414, 103), dtype=np.uint8\n Each sample has a value of 1 in its categories, and 0 in others.\n The array has 3.15% of non zero values. Will be of CSR format.\n sample_id : ndarray of shape (804414,), dtype=np.uint32,\n Identification number of each sample, as ordered in dataset.data.\n target_names : ndarray of shape (103,), dtype=object\n Names of each target (RCV1 topics), as ordered in dataset.target.\n DESCR : str\n Description of the RCV1 dataset.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n .. versionadded:: 0.20"
- },
- {
- "name": "_inverse_permutation",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Inverse permutation p."
- },
- {
- "name": "_find_permutation",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Find the permutation from a to b."
- }
- ]
- },
- {
- "name": "sklearn.datasets._samples_generator",
- "imports": [
- "import numbers",
- "import array",
- "from collections.abc import Iterable",
- "import numpy as np",
- "from scipy import linalg",
- "import scipy.sparse as sp",
- "from preprocessing import MultiLabelBinarizer",
- "from utils import check_array",
- "from utils import check_random_state",
- "from utils import shuffle as util_shuffle",
- "from utils.random import sample_without_replacement",
- "from utils.validation import _deprecate_positional_args"
- ],
- "classes": [],
- "functions": [
- {
- "name": "_generate_hypercube",
- "decorators": [],
- "parameters": [],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Returns distinct binary samples of length dimensions.\n "
- },
- {
- "name": "make_classification",
- "decorators": [],
- "parameters": [
- {
- "name": "n_samples",
- "type": "int",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of samples."
- },
- {
- "name": "n_features",
- "type": "int",
- "hasDefault": true,
- "default": "20",
- "limitation": null,
- "ignored": false,
- "docstring": "The total number of features. These comprise ``n_informative`` informative features, ``n_redundant`` redundant features, ``n_repeated`` duplicated features and ``n_features-n_informative-n_redundant-n_repeated`` useless features drawn at random."
- },
- {
- "name": "n_informative",
- "type": "int",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of informative features. Each class is composed of a number of gaussian clusters each located around the vertices of a hypercube in a subspace of dimension ``n_informative``. For each cluster, informative features are drawn independently from N(0, 1) and then randomly linearly combined within each cluster in order to add covariance. The clusters are then placed on the vertices of the hypercube."
- },
- {
- "name": "n_redundant",
- "type": "int",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of redundant features. These features are generated as random linear combinations of the informative features."
- },
- {
- "name": "n_repeated",
- "type": "int",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of duplicated features, drawn randomly from the informative and the redundant features."
- },
- {
- "name": "n_classes",
- "type": "int",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of classes (or labels) of the classification problem."
- },
- {
- "name": "n_clusters_per_class",
- "type": "int",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of clusters per class."
- },
- {
- "name": "weights",
- "type": "ArrayLike",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The proportions of samples assigned to each class. If None, then classes are balanced. Note that if ``len(weights) == n_classes - 1``, then the last class weight is automatically inferred. More than ``n_samples`` samples may be returned if the sum of ``weights`` exceeds 1. Note that the actual class proportions will not exactly match ``weights`` when ``flip_y`` isn't 0."
- },
- {
- "name": "flip_y",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "The fraction of samples whose class is assigned randomly. Larger values introduce noise in the labels and make the classification task harder. Note that the default setting flip_y > 0 might lead to less than ``n_classes`` in y in some cases."
- },
- {
- "name": "class_sep",
- "type": "float",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "The factor multiplying the hypercube size. Larger values spread out the clusters/classes and make the classification task easier."
- },
- {
- "name": "hypercube",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the clusters are put on the vertices of a hypercube. If False, the clusters are put on the vertices of a random polytope."
- },
- {
- "name": "shift",
- "type": "Union[NDArray, float]",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "Shift features by the specified value. If None, then features are shifted by a random value drawn in [-class_sep, class_sep]."
- },
- {
- "name": "scale",
- "type": "Union[NDArray, float]",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "Multiply features by the specified value. If None, then features are scaled by a random value drawn in [1, 100]. Note that scaling happens after shifting."
- },
- {
- "name": "shuffle",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Shuffle the samples and the features."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generate a random n-class classification problem.\n\nThis initially creates clusters of points normally distributed (std=1)\nabout vertices of an ``n_informative``-dimensional hypercube with sides of\nlength ``2*class_sep`` and assigns an equal number of clusters to each\nclass. It introduces interdependence between these features and adds\nvarious types of further noise to the data.\n\nWithout shuffling, ``X`` horizontally stacks features in the following\norder: the primary ``n_informative`` features, followed by ``n_redundant``\nlinear combinations of the informative features, followed by ``n_repeated``\nduplicates, drawn randomly with replacement from the informative and\nredundant features. The remaining features are filled with random noise.\nThus, without shuffling, all useful features are contained in the columns\n``X[:, :n_informative + n_redundant + n_repeated]``.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=20\n The total number of features. These comprise ``n_informative``\n informative features, ``n_redundant`` redundant features,\n ``n_repeated`` duplicated features and\n ``n_features-n_informative-n_redundant-n_repeated`` useless features\n drawn at random.\n\nn_informative : int, default=2\n The number of informative features. Each class is composed of a number\n of gaussian clusters each located around the vertices of a hypercube\n in a subspace of dimension ``n_informative``. For each cluster,\n informative features are drawn independently from N(0, 1) and then\n randomly linearly combined within each cluster in order to add\n covariance. The clusters are then placed on the vertices of the\n hypercube.\n\nn_redundant : int, default=2\n The number of redundant features. These features are generated as\n random linear combinations of the informative features.\n\nn_repeated : int, default=0\n The number of duplicated features, drawn randomly from the informative\n and the redundant features.\n\nn_classes : int, default=2\n The number of classes (or labels) of the classification problem.\n\nn_clusters_per_class : int, default=2\n The number of clusters per class.\n\nweights : array-like of shape (n_classes,) or (n_classes - 1,), default=None\n The proportions of samples assigned to each class. If None, then\n classes are balanced. Note that if ``len(weights) == n_classes - 1``,\n then the last class weight is automatically inferred.\n More than ``n_samples`` samples may be returned if the sum of\n ``weights`` exceeds 1. Note that the actual class proportions will\n not exactly match ``weights`` when ``flip_y`` isn't 0.\n\nflip_y : float, default=0.01\n The fraction of samples whose class is assigned randomly. Larger\n values introduce noise in the labels and make the classification\n task harder. Note that the default setting flip_y > 0 might lead\n to less than ``n_classes`` in y in some cases.\n\nclass_sep : float, default=1.0\n The factor multiplying the hypercube size. Larger values spread\n out the clusters/classes and make the classification task easier.\n\nhypercube : bool, default=True\n If True, the clusters are put on the vertices of a hypercube. If\n False, the clusters are put on the vertices of a random polytope.\n\nshift : float, ndarray of shape (n_features,) or None, default=0.0\n Shift features by the specified value. If None, then features\n are shifted by a random value drawn in [-class_sep, class_sep].\n\nscale : float, ndarray of shape (n_features,) or None, default=1.0\n Multiply features by the specified value. If None, then features\n are scaled by a random value drawn in [1, 100]. Note that scaling\n happens after shifting.\n\nshuffle : bool, default=True\n Shuffle the samples and the features.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels for class membership of each sample.\n\nNotes\n-----\nThe algorithm is adapted from Guyon [1] and was designed to generate\nthe \"Madelon\" dataset.\n\nReferences\n----------\n.. [1] I. Guyon, \"Design of experiments for the NIPS 2003 variable\n selection benchmark\", 2003.\n\nSee Also\n--------\nmake_blobs : Simplified variant.\nmake_multilabel_classification : Unrelated generator for multilabel tasks."
- },
- {
- "name": "make_multilabel_classification",
- "decorators": [],
- "parameters": [
- {
- "name": "n_samples",
- "type": "int",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of samples."
- },
- {
- "name": "n_features",
- "type": "int",
- "hasDefault": true,
- "default": "20",
- "limitation": null,
- "ignored": false,
- "docstring": "The total number of features."
- },
- {
- "name": "n_classes",
- "type": "int",
- "hasDefault": true,
- "default": "5",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of classes of the classification problem."
- },
- {
- "name": "n_labels",
- "type": "int",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "The average number of labels per instance. More precisely, the number of labels per sample is drawn from a Poisson distribution with ``n_labels`` as its expected value, but samples are bounded (using rejection sampling) by ``n_classes``, and must be nonzero if ``allow_unlabeled`` is False."
- },
- {
- "name": "length",
- "type": "int",
- "hasDefault": true,
- "default": "50",
- "limitation": null,
- "ignored": false,
- "docstring": "The sum of the features (number of words if documents) is drawn from a Poisson distribution with this expected value."
- },
- {
- "name": "allow_unlabeled",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "If ``True``, some instances might not belong to any class."
- },
- {
- "name": "sparse",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If ``True``, return a sparse feature matrix .. versionadded:: 0.17 parameter to allow *sparse* output."
- },
- {
- "name": "return_indicator",
- "type": "Literal['dense', 'sparse']",
- "hasDefault": true,
- "default": "'dense'",
- "limitation": null,
- "ignored": false,
- "docstring": "If ``'dense'`` return ``Y`` in the dense binary indicator format. If ``'sparse'`` return ``Y`` in the sparse binary indicator format. ``False`` returns a list of lists of labels."
- },
- {
- "name": "return_distributions",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If ``True``, return the prior class probability and conditional probabilities of features given classes, from which the data was drawn."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generate a random multilabel classification problem.\n\nFor each sample, the generative process is:\n - pick the number of labels: n ~ Poisson(n_labels)\n - n times, choose a class c: c ~ Multinomial(theta)\n - pick the document length: k ~ Poisson(length)\n - k times, choose a word: w ~ Multinomial(theta_c)\n\nIn the above process, rejection sampling is used to make sure that\nn is never zero or more than `n_classes`, and that the document length\nis never zero. Likewise, we reject classes which have already been chosen.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=20\n The total number of features.\n\nn_classes : int, default=5\n The number of classes of the classification problem.\n\nn_labels : int, default=2\n The average number of labels per instance. More precisely, the number\n of labels per sample is drawn from a Poisson distribution with\n ``n_labels`` as its expected value, but samples are bounded (using\n rejection sampling) by ``n_classes``, and must be nonzero if\n ``allow_unlabeled`` is False.\n\nlength : int, default=50\n The sum of the features (number of words if documents) is drawn from\n a Poisson distribution with this expected value.\n\nallow_unlabeled : bool, default=True\n If ``True``, some instances might not belong to any class.\n\nsparse : bool, default=False\n If ``True``, return a sparse feature matrix\n\n .. versionadded:: 0.17\n parameter to allow *sparse* output.\n\nreturn_indicator : {'dense', 'sparse'} or False, default='dense'\n If ``'dense'`` return ``Y`` in the dense binary indicator format. If\n ``'sparse'`` return ``Y`` in the sparse binary indicator format.\n ``False`` returns a list of lists of labels.\n\nreturn_distributions : bool, default=False\n If ``True``, return the prior class probability and conditional\n probabilities of features given classes, from which the data was\n drawn.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The generated samples.\n\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n The label sets. Sparse matrix should be of CSR format.\n\np_c : ndarray of shape (n_classes,)\n The probability of each class being drawn. Only returned if\n ``return_distributions=True``.\n\np_w_c : ndarray of shape (n_features, n_classes)\n The probability of each feature being drawn given each class.\n Only returned if ``return_distributions=True``."
- },
- {
- "name": "make_hastie_10_2",
- "decorators": [],
- "parameters": [
- {
- "name": "n_samples",
- "type": "int",
- "hasDefault": true,
- "default": "12000",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of samples."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generates data for binary classification used in\nHastie et al. 2009, Example 10.2.\n\nThe ten features are standard independent Gaussian and\nthe target ``y`` is defined by::\n\n y[i] = 1 if np.sum(X[i] ** 2) > 9.34 else -1\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=12000\n The number of samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 10)\n The input samples.\n\ny : ndarray of shape (n_samples,)\n The output values.\n\nReferences\n----------\n.. [1] T. Hastie, R. Tibshirani and J. Friedman, \"Elements of Statistical\n Learning Ed. 2\", Springer, 2009.\n\nSee Also\n--------\nmake_gaussian_quantiles : A generalization of this dataset approach."
- },
- {
- "name": "make_regression",
- "decorators": [],
- "parameters": [
- {
- "name": "n_samples",
- "type": "int",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of samples."
- },
- {
- "name": "n_features",
- "type": "int",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of features."
- },
- {
- "name": "n_informative",
- "type": "int",
- "hasDefault": true,
- "default": "10",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of informative features, i.e., the number of features used to build the linear model used to generate the output."
- },
- {
- "name": "n_targets",
- "type": "int",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of regression targets, i.e., the dimension of the y output vector associated with a sample. By default, the output is a scalar."
- },
- {
- "name": "bias",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "The bias term in the underlying linear model."
- },
- {
- "name": "effective_rank",
- "type": "int",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "if not None: The approximate number of singular vectors required to explain most of the input data by linear combinations. Using this kind of singular spectrum in the input allows the generator to reproduce the correlations often observed in practice. if None: The input set is well conditioned, centered and gaussian with unit variance."
- },
- {
- "name": "tail_strength",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "The relative importance of the fat noisy tail of the singular values profile if `effective_rank` is not None. When a float, it should be between 0 and 1."
- },
- {
- "name": "noise",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "The standard deviation of the gaussian noise applied to the output."
- },
- {
- "name": "shuffle",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Shuffle the samples and the features."
- },
- {
- "name": "coef",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, the coefficients of the underlying linear model are returned."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generate a random regression problem.\n\nThe input set can either be well conditioned (by default) or have a low\nrank-fat tail singular profile. See :func:`make_low_rank_matrix` for\nmore details.\n\nThe output is generated by applying a (potentially biased) random linear\nregression model with `n_informative` nonzero regressors to the previously\ngenerated input and some gaussian centered noise with some adjustable\nscale.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=100\n The number of features.\n\nn_informative : int, default=10\n The number of informative features, i.e., the number of features used\n to build the linear model used to generate the output.\n\nn_targets : int, default=1\n The number of regression targets, i.e., the dimension of the y output\n vector associated with a sample. By default, the output is a scalar.\n\nbias : float, default=0.0\n The bias term in the underlying linear model.\n\neffective_rank : int, default=None\n if not None:\n The approximate number of singular vectors required to explain most\n of the input data by linear combinations. Using this kind of\n singular spectrum in the input allows the generator to reproduce\n the correlations often observed in practice.\n if None:\n The input set is well conditioned, centered and gaussian with\n unit variance.\n\ntail_strength : float, default=0.5\n The relative importance of the fat noisy tail of the singular values\n profile if `effective_rank` is not None. When a float, it should be\n between 0 and 1.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\nshuffle : bool, default=True\n Shuffle the samples and the features.\n\ncoef : bool, default=False\n If True, the coefficients of the underlying linear model are returned.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The input samples.\n\ny : ndarray of shape (n_samples,) or (n_samples, n_targets)\n The output values.\n\ncoef : ndarray of shape (n_features,) or (n_features, n_targets)\n The coefficient of the underlying linear model. It is returned only if\n coef is True."
- },
- {
- "name": "make_circles",
- "decorators": [],
- "parameters": [
- {
- "name": "n_samples",
- "type": "Union[Tuple[], int]",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "If int, it is the total number of points generated. For odd numbers, the inner circle will have one point more than the outer circle. If two-element tuple, number of points in outer circle and inner circle. .. versionchanged:: 0.23 Added two-element tuple."
- },
- {
- "name": "shuffle",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to shuffle the samples."
- },
- {
- "name": "noise",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Standard deviation of Gaussian noise added to the data."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for dataset shuffling and noise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- },
- {
- "name": "factor",
- "type": "float",
- "hasDefault": true,
- "default": ".",
- "limitation": null,
- "ignored": false,
- "docstring": "Scale factor between inner and outer circle in the range `(0, 1)`."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Make a large circle containing a smaller circle in 2d.\n\nA simple toy dataset to visualize clustering and classification\nalgorithms.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int or tuple of shape (2,), dtype=int, default=100\n If int, it is the total number of points generated.\n For odd numbers, the inner circle will have one point more than the\n outer circle.\n If two-element tuple, number of points in outer circle and inner\n circle.\n\n .. versionchanged:: 0.23\n Added two-element tuple.\n\nshuffle : bool, default=True\n Whether to shuffle the samples.\n\nnoise : float, default=None\n Standard deviation of Gaussian noise added to the data.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling and noise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nfactor : float, default=.8\n Scale factor between inner and outer circle in the range `(0, 1)`.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 2)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels (0 or 1) for class membership of each sample."
- },
- {
- "name": "make_moons",
- "decorators": [],
- "parameters": [
- {
- "name": "n_samples",
- "type": "Union[Tuple[], int]",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "If int, the total number of points generated. If two-element tuple, number of points in each of two moons. .. versionchanged:: 0.23 Added two-element tuple."
- },
- {
- "name": "shuffle",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Whether to shuffle the samples."
- },
- {
- "name": "noise",
- "type": "float",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Standard deviation of Gaussian noise added to the data."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for dataset shuffling and noise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Make two interleaving half circles.\n\nA simple toy dataset to visualize clustering and classification\nalgorithms. Read more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int or tuple of shape (2,), dtype=int, default=100\n If int, the total number of points generated.\n If two-element tuple, number of points in each of two moons.\n\n .. versionchanged:: 0.23\n Added two-element tuple.\n\nshuffle : bool, default=True\n Whether to shuffle the samples.\n\nnoise : float, default=None\n Standard deviation of Gaussian noise added to the data.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset shuffling and noise.\n Pass an int for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 2)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels (0 or 1) for class membership of each sample."
- },
- {
- "name": "make_blobs",
- "decorators": [],
- "parameters": [
- {
- "name": "n_samples",
- "type": "Union[ArrayLike, int]",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "If int, it is the total number of points equally divided among clusters. If array-like, each element of the sequence indicates the number of samples per cluster. .. versionchanged:: v0.20 one can now pass an array-like to the ``n_samples`` parameter"
- },
- {
- "name": "n_features",
- "type": "int",
- "hasDefault": true,
- "default": "2",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of features for each sample."
- },
- {
- "name": "centers",
- "type": "Union[NDArray, int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of centers to generate, or the fixed center locations. If n_samples is an int and centers is None, 3 centers are generated. If n_samples is array-like, centers must be either None or an array of length equal to the length of n_samples."
- },
- {
- "name": "cluster_std",
- "type": "Union[ArrayLike, float]",
- "hasDefault": true,
- "default": "1",
- "limitation": null,
- "ignored": false,
- "docstring": "The standard deviation of the clusters."
- },
- {
- "name": "center_box",
- "type": "Tuple[float]",
- "hasDefault": false,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "The bounding box for each cluster center when centers are generated at random."
- },
- {
- "name": "shuffle",
- "type": "bool",
- "hasDefault": true,
- "default": "True",
- "limitation": null,
- "ignored": false,
- "docstring": "Shuffle the samples."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- },
- {
- "name": "return_centers",
- "type": "bool",
- "hasDefault": true,
- "default": "False",
- "limitation": null,
- "ignored": false,
- "docstring": "If True, then return the centers of each cluster .. versionadded:: 0.23"
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generate isotropic Gaussian blobs for clustering.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int or array-like, default=100\n If int, it is the total number of points equally divided among\n clusters.\n If array-like, each element of the sequence indicates\n the number of samples per cluster.\n\n .. versionchanged:: v0.20\n one can now pass an array-like to the ``n_samples`` parameter\n\nn_features : int, default=2\n The number of features for each sample.\n\ncenters : int or ndarray of shape (n_centers, n_features), default=None\n The number of centers to generate, or the fixed center locations.\n If n_samples is an int and centers is None, 3 centers are generated.\n If n_samples is array-like, centers must be\n either None or an array of length equal to the length of n_samples.\n\ncluster_std : float or array-like of float, default=1.0\n The standard deviation of the clusters.\n\ncenter_box : tuple of float (min, max), default=(-10.0, 10.0)\n The bounding box for each cluster center when centers are\n generated at random.\n\nshuffle : bool, default=True\n Shuffle the samples.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset creation. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary `.\n\nreturn_centers : bool, default=False\n If True, then return the centers of each cluster\n\n .. versionadded:: 0.23\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n The generated samples.\n\ny : ndarray of shape (n_samples,)\n The integer labels for cluster membership of each sample.\n\ncenters : ndarray of shape (n_centers, n_features)\n The centers of each cluster. Only returned if\n ``return_centers=True``.\n\nExamples\n--------\n>>> from sklearn.datasets import make_blobs\n>>> X, y = make_blobs(n_samples=10, centers=3, n_features=2,\n... random_state=0)\n>>> print(X.shape)\n(10, 2)\n>>> y\narray([0, 0, 1, 0, 2, 2, 2, 1, 1, 0])\n>>> X, y = make_blobs(n_samples=[3, 3, 4], centers=None, n_features=2,\n... random_state=0)\n>>> print(X.shape)\n(10, 2)\n>>> y\narray([0, 1, 2, 0, 2, 2, 2, 1, 1, 0])\n\nSee Also\n--------\nmake_classification : A more intricate variant."
- },
- {
- "name": "make_friedman1",
- "decorators": [],
- "parameters": [
- {
- "name": "n_samples",
- "type": "int",
- "hasDefault": true,
- "default": "100",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of samples."
- },
- {
- "name": "n_features",
- "type": "int",
- "hasDefault": true,
- "default": "10",
- "limitation": null,
- "ignored": false,
- "docstring": "The number of features. Should be at least 5."
- },
- {
- "name": "noise",
- "type": "float",
- "hasDefault": true,
- "default": "0",
- "limitation": null,
- "ignored": false,
- "docstring": "The standard deviation of the gaussian noise applied to the output."
- },
- {
- "name": "random_state",
- "type": "Optional[int]",
- "hasDefault": true,
- "default": "None",
- "limitation": null,
- "ignored": false,
- "docstring": "Determines random number generation for dataset noise. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `."
- }
- ],
- "hasReturnType": false,
- "returnType": "Any",
- "docstring": "Generate the \"Friedman #1\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are independent features uniformly distributed on the interval\n[0, 1]. The output `y` is created according to the formula::\n\n y(X) = 10 * sin(pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - 0.5) ** 2 + 10 * X[:, 3] + 5 * X[:, 4] + noise * N(0, 1).\n\nOut of the `n_features` features, only 5 are actually used to compute\n`y`. The remaining features are independent of `y`.\n\nThe number of features has to be >= 5.\n\nRead more in the :ref:`User Guide `.\n\nParameters\n----------\nn_samples : int, default=100\n The number of samples.\n\nn_features : int, default=10\n The number of features. Should be at least 5.\n\nnoise : float, default=0.0\n The standard deviation of the gaussian noise applied to the output.\n\nrandom_state : int, RandomState instance or None, default=None\n Determines random number generation for dataset noise. Pass an int\n for reproducible output across multiple function calls.\n See :term:`Glossary